BST 260 Final Project Group: Chuying Ma, Jingjing Tang, Jie Yin, Xuewei Zhang
This Rmd file is for studying the relationship between other variables and mental health:
Mental health patterns in 2000 for all US counties in a map:
library(maps)
library(RColorBrewer)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
library(readr)
data_2000 = read.csv("data_2000.csv")
ment_2000 <- data_2000 %>%
dplyr::select(fips,menthlth)
data(county.fips)
cnty <- map_data("county")
men_map_00 <- cnty %>%
mutate(polyname = paste(region,subregion,sep=",")) %>%
left_join(county.fips, by="polyname")
men_df_00 <- inner_join(men_map_00, ment_2000, by=c("fips" = "fips") )
map_00 = ggplot(men_df_00, aes(long, lat,group = group)) +
geom_polygon(aes(fill = menthlth)) + coord_quickmap() +
scale_x_continuous(expand=c(0,0)) +
scale_fill_gradientn(colors = brewer.pal(9, "RdPu"), trans = "sqrt")
map_00
mental health patterns in 2001 for all US counties in a map:
data_2001 = read.csv("data_2001.csv")
ment_2001 <- data_2001 %>% dplyr::select(fips,menthlth)
data(county.fips)
cnty <- map_data("county")
men_map_01 <- cnty %>%
mutate(polyname = paste(region,subregion,sep=",")) %>%
left_join(county.fips, by="polyname")
men_df_01 <- inner_join(men_map_01, ment_2001, by=c("fips" = "fips") )
map_01 = ggplot(men_df_01, aes(long, lat,group = group)) +
geom_polygon(aes(fill = menthlth)) + coord_quickmap() +
scale_x_continuous(expand=c(0,0)) +
scale_fill_gradientn(colors = brewer.pal(9, "RdPu"), trans = "sqrt")
map_01
mental health patterns in 2002 for all US counties in a map:
data_2002 = read.csv("data_2002.csv")
ment_2002 <- data_2002 %>% dplyr::select(fips,menthlth)
data(county.fips)
cnty <- map_data("county")
men_map_02 <- cnty %>%
mutate(polyname = paste(region,subregion,sep=",")) %>%
left_join(county.fips, by="polyname")
men_df_02 <- inner_join(men_map_02, ment_2002, by=c("fips" = "fips") )
map_02 = ggplot(men_df_02, aes(long, lat,group = group)) +
geom_polygon(aes(fill = menthlth)) + coord_quickmap() +
scale_x_continuous(expand=c(0,0)) +
scale_fill_gradientn(colors = brewer.pal(9, "RdPu"), trans = "sqrt")
map_02
mental health patterns in 2003 for all US counties in a map:
data_2003 = read.csv("data_2003.csv")
ment_2003 <- data_2003 %>% dplyr::select(fips,menthlth)
data(county.fips)
cnty <- map_data("county")
men_map_03 <- cnty %>%
mutate(polyname = paste(region,subregion,sep=",")) %>%
left_join(county.fips, by="polyname")
men_df_03 <- inner_join(men_map_03, ment_2003, by=c("fips" = "fips") )
map_03 = ggplot(men_df_03, aes(long, lat,group = group)) +
geom_polygon(aes(fill = menthlth)) + coord_quickmap() +
scale_x_continuous(expand=c(0,0)) +
scale_fill_gradientn(colors = brewer.pal(9, "RdPu"), trans = "sqrt")
map_03
mental health patterns in 2004 for all US counties in a map:
data_2004 = read.csv("data_2004.csv")
ment_2004 <- data_2004 %>% dplyr::select(fips,menthlth)
data(county.fips)
cnty <- map_data("county")
men_map_04 <- cnty %>%
mutate(polyname = paste(region,subregion,sep=",")) %>%
left_join(county.fips, by="polyname")
men_df_04 <- inner_join(men_map_04, ment_2004, by=c("fips" = "fips") )
map_04 = ggplot(men_df_04, aes(long, lat,group = group)) +
geom_polygon(aes(fill = menthlth)) + coord_quickmap() +
scale_x_continuous(expand=c(0,0)) +
scale_fill_gradientn(colors = brewer.pal(9, "RdPu"), trans = "sqrt")
map_04
mental health patterns in 2005 for all US counties in a map:
data_2005 = read.csv("data_2005.csv")
ment_2005 <- data_2005 %>% dplyr::select(fips,menthlth)
data(county.fips)
cnty <- map_data("county")
men_map_05 <- cnty %>%
mutate(polyname = paste(region,subregion,sep=",")) %>%
left_join(county.fips, by="polyname")
men_df_05 <- inner_join(men_map_05, ment_2005, by=c("fips" = "fips"))
map_05 = ggplot(men_df_05, aes(long, lat,group = group)) +
geom_polygon(aes(fill = menthlth)) + coord_quickmap() +
scale_x_continuous(expand=c(0,0)) +
scale_fill_gradientn(colors = brewer.pal(9, "RdPu"), trans = "sqrt")
map_05
mental health patterns in 2006 for all US counties in a map:
data_2006 = read.csv("data_2006.csv")
ment_2006 <- data_2006 %>% dplyr::select(fips,menthlth)
data(county.fips)
cnty <- map_data("county")
men_map_06 <- cnty %>%
mutate(polyname = paste(region,subregion,sep=",")) %>%
left_join(county.fips, by="polyname")
men_df_06 <- inner_join(men_map_06, ment_2006, by=c("fips" = "fips"))
map_06 = ggplot(men_df_06, aes(long, lat,group = group)) +
geom_polygon(aes(fill = menthlth)) + coord_quickmap() +
scale_x_continuous(expand=c(0,0)) +
scale_fill_gradientn(colors = brewer.pal(9, "RdPu"), trans = "sqrt")
map_06
mental health patterns in 2007 for all US counties in a map:
data_2007 = read.csv("data_2007.csv")
ment_2007 <- data_2007 %>% dplyr::select(fips,menthlth)
data(county.fips)
cnty <- map_data("county")
men_map_07 <- cnty %>%
mutate(polyname = paste(region,subregion,sep=",")) %>%
left_join(county.fips, by="polyname")
men_df_07 <- inner_join(men_map_07, ment_2007, by=c("fips" = "fips"))
map_07 = ggplot(men_df_07, aes(long, lat,group = group)) +
geom_polygon(aes(fill = menthlth)) + coord_quickmap() +
scale_x_continuous(expand=c(0,0)) +
scale_fill_gradientn(colors = brewer.pal(9, "RdPu"), trans = "sqrt")
map_07
mental health patterns in 2008 for all US counties in a map:
data_2008 = read.csv("2008_data.csv")
ment_2008 <- data_2008 %>% dplyr::select(fips,menthlth)
data(county.fips)
cnty <- map_data("county")
men_map_08 <- cnty %>%
mutate(polyname = paste(region,subregion,sep=",")) %>%
left_join(county.fips, by="polyname")
men_df_08 <- inner_join(men_map_08, ment_2008, by=c("fips" = "fips"))
map_08 = ggplot(men_df_08, aes(long, lat,group = group)) +
geom_polygon(aes(fill = menthlth)) + coord_quickmap() +
scale_x_continuous(expand=c(0,0)) +
scale_fill_gradientn(colors = brewer.pal(9, "RdPu"), trans = "sqrt")
map_08
mental health patterns in 2009 for all US counties in a map:
data_2009 = read.csv("2009_data.csv")
ment_2009 <- data_2009 %>% dplyr::select(fips,menthlth)
data(county.fips)
cnty <- map_data("county")
men_map_09 <- cnty %>%
mutate(polyname = paste(region,subregion,sep=",")) %>%
left_join(county.fips, by="polyname")
men_df_09 <- inner_join(men_map_09, ment_2009, by=c("fips" = "fips"))
map_09 = ggplot(men_df_09, aes(long, lat,group = group)) +
geom_polygon(aes(fill = menthlth)) + coord_quickmap() +
scale_x_continuous(expand=c(0,0)) +
scale_fill_gradientn(colors = brewer.pal(9, "RdPu"), trans = "sqrt")
map_09
mental health patterns in 2010 for all US counties in a map:
data_2010 = read.csv("2010_data.csv")
ment_2010 <- data_2010 %>% dplyr::select(fips,menthlth)
data(county.fips)
cnty <- map_data("county")
men_map_10 <- cnty %>%
mutate(polyname = paste(region,subregion,sep=",")) %>%
left_join(county.fips, by="polyname")
men_df_10 <- inner_join(men_map_10, ment_2010, by=c("fips" = "fips"))
map_10 = ggplot(men_df_10, aes(long, lat,group = group)) +
geom_polygon(aes(fill = menthlth)) + coord_quickmap() +
scale_x_continuous(expand=c(0,0)) +
scale_fill_gradientn(colors = brewer.pal(9, "RdPu"), trans = "sqrt")
map_10
map in the same plot using facet:
mental_data = data.frame()
mental_data = rbind(data_2000,data_2001,data_2002)
mental_data = rbind(mental_data,data_2003,data_2004)
mental_data = rbind(mental_data,data_2005,data_2006)
mental_data = rbind(mental_data,data_2007,data_2008,data_2009,data_2010)
write_csv(mental_data,"10_year_data.csv")
mental_data = read_csv("10_year_data.csv")
## Parsed with column specification:
## cols(
## .default = col_double(),
## fips = col_integer(),
## state = col_integer(),
## county = col_integer(),
## year = col_integer()
## )
## See spec(...) for full column specifications.
ment <- mental_data %>% dplyr::select(fips,menthlth,year)
cnty <- map_data("county")
men_map <- cnty %>%
mutate(polyname = paste(region,subregion,sep=",")) %>%
left_join(county.fips, by="polyname")
men_df <- inner_join(men_map, ment, by=c("fips" = "fips"))
map_all_year = ggplot(men_df, aes(long, lat,group = group)) +
geom_polygon(aes(fill = menthlth)) + coord_quickmap() +
scale_x_continuous(expand=c(0,0)) +
scale_fill_gradientn(colors = brewer.pal(9, "RdPu"), trans = "sqrt") +
facet_wrap(~year,ncol = 3) +
ggtitle("Number of Days Mental Health Not Good in US counties from 2000 to 2010")
map_all_year
Are you living in the states where people are more unhappier? Be careful if you live at Kentucky, West Virginia or Puerto Rico! In those states, people who had assess to the BRFSS survey reported that they spent more than half of their days in a month in a bad mental health conditions.
Do you think people become unhappier than before at state level? The answer is probably yes as the color tends to be darker year by year.
write state level data for exposure:
combine with pm2.5 and ozone:
pm2.5 = read_csv("PM25_Ozone_county_2000_2012.csv")
pm2.5$COUNTY = as.numeric(pm2.5$COUNTY)
pm2.5_00 = pm2.5 %>%
select(COUNTY,PM25_2000,Ozone_2000)
data_2000 = data_2000 %>%
left_join(pm2.5_00,by = c("fips" = "COUNTY"))
colnames(data_2000)[48] = "pm2.5"
colnames(data_2000)[49] = "ozone"
write_csv(data_2000,"data_00_expo.csv")
pm2.5_01 = pm2.5 %>%
select(COUNTY,PM25_2001,Ozone_2001)
data_2001 = data_2001 %>%
left_join(pm2.5_01,by = c("fips" = "COUNTY"))
colnames(data_2001)[48] = "pm2.5"
colnames(data_2001)[49] = "ozone"
write_csv(data_2001,"data_01_expo.csv")
pm2.5_02 = pm2.5 %>%
select(COUNTY,PM25_2002,Ozone_2002)
data_2002 = data_2002 %>%
left_join(pm2.5_02,by = c("fips" = "COUNTY"))
colnames(data_2002)[48] = "pm2.5"
colnames(data_2002)[49] = "ozone"
write_csv(data_2002,"data_02_expo.csv")
pm2.5_03 = pm2.5 %>%
select(COUNTY,PM25_2003,Ozone_2003)
data_2003 = data_2003 %>%
left_join(pm2.5_03,by = c("fips" = "COUNTY"))
colnames(data_2003)[48] = "pm2.5"
colnames(data_2003)[49] = "ozone"
write_csv(data_2003,"data_03_expo.csv")
pm2.5_04 = pm2.5 %>%
select(COUNTY,PM25_2004,Ozone_2004)
data_2004 = data_2004 %>%
left_join(pm2.5_04,by = c("fips" = "COUNTY"))
colnames(data_2004)[48] = "pm2.5"
colnames(data_2004)[49] = "ozone"
write_csv(data_2004,"data_04_expo.csv")
pm2.5_05 = pm2.5 %>%
select(COUNTY,PM25_2005,Ozone_2005)
data_2005 = data_2005 %>%
left_join(pm2.5_05,by = c("fips" = "COUNTY"))
colnames(data_2005)[48] = "pm2.5"
colnames(data_2005)[49] = "ozone"
write_csv(data_2005,"data_05_expo.csv")
pm2.5_06 = pm2.5 %>%
select(COUNTY,PM25_2006,Ozone_2006)
data_2006 = data_2006 %>%
left_join(pm2.5_06,by = c("fips" = "COUNTY"))
colnames(data_2006)[48] = "pm2.5"
colnames(data_2006)[49] = "ozone"
write_csv(data_2006,"data_06_expo.csv")
pm2.5_07 = pm2.5 %>%
select(COUNTY,PM25_2007,Ozone_2007)
data_2007 = data_2007 %>%
left_join(pm2.5_07,by = c("fips" = "COUNTY"))
colnames(data_2007)[48] = "pm2.5"
colnames(data_2007)[49] = "ozone"
write_csv(data_2007,"data_07_expo.csv")
pm2.5_08 = pm2.5 %>%
select(COUNTY,PM25_2008,Ozone_2008)
data_2008 = data_2008 %>%
left_join(pm2.5_08,by = c("fips" = "COUNTY"))
colnames(data_2008)[48] = "pm2.5"
colnames(data_2008)[49] = "ozone"
write_csv(data_2008,"data_08_expo.csv")
pm2.5_09 = pm2.5 %>%
select(COUNTY,PM25_2009,Ozone_2009)
data_2009 = data_2009 %>%
left_join(pm2.5_09,by = c("fips" = "COUNTY"))
colnames(data_2009)[48] = "pm2.5"
colnames(data_2009)[49] = "ozone"
write_csv(data_2009,"data_09_expo.csv")
pm2.5_10 = pm2.5 %>%
select(COUNTY,PM25_2010,Ozone_2010)
data_2010 = data_2010 %>%
left_join(pm2.5_10,by = c("fips" = "COUNTY"))
colnames(data_2010)[48] = "pm2.5"
colnames(data_2010)[49] = "ozone"
write_csv(data_2010,"data_10_expo.csv")
combine all data to write a csv file for future use:
all_expo = rbind(data_2000,data_2001,data_2002,data_2003,data_2004,data_2005,data_2006,data_2007,data_2008,data_2009,data_2010)
write_csv(all_expo,"data_exposure_00_10.csv")
run linear regression for all covariates:
library(readr)
library(dplyr)
data_all = read_csv("data_3_expo.csv")
## Parsed with column specification:
## cols(
## .default = col_double(),
## fips = col_integer(),
## state = col_integer(),
## county = col_integer()
## )
## See spec(...) for full column specifications.
reg_data = data_all %>%
dplyr::select(-c(fips,state,county,year,heartattack,ACheartdis,stroke,asthma))
full_men_model = lm(menthlth ~ .,data = reg_data)
summary(full_men_model)
##
## Call:
## lm(formula = menthlth ~ ., data = reg_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -10.2722 -1.0666 -0.0411 0.9740 21.1965
##
## Coefficients: (6 not defined because of singularities)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.322e+02 3.610e+02 -0.366 0.714149
## age 2.678e-03 1.944e-02 0.138 0.890415
## sex 1.182e+00 6.650e-01 1.777 0.075624 .
## white -2.932e-02 1.384e-01 -0.212 0.832197
## black 9.434e-02 1.912e-01 0.493 0.621805
## asian -5.513e-01 4.068e-01 -1.355 0.175451
## race_other NA NA NA NA
## educ1 1.030e+00 1.340e+00 0.769 0.442048
## educ2 1.552e+00 5.739e-01 2.704 0.006884 **
## educ3 NA NA NA NA
## income1 -7.363e+00 1.303e+00 -5.651 1.71e-08 ***
## income2 -1.078e-01 1.194e+00 -0.090 0.928069
## income3 -2.024e-01 1.115e+00 -0.182 0.855901
## income4 -2.392e+00 9.302e-01 -2.572 0.010156 *
## income5 -5.853e-01 8.726e-01 -0.671 0.502461
## income6 -1.296e+00 8.307e-01 -1.561 0.118713
## income7 -8.315e-01 8.679e-01 -0.958 0.338087
## income8 NA NA NA NA
## married -1.362e+00 5.790e-01 -2.352 0.018719 *
## unmarried NA NA NA NA
## employed -3.149e+00 8.035e-01 -3.919 9.04e-05 ***
## out_of_work 5.473e+00 1.451e+00 3.773 0.000164 ***
## homemaker -2.828e+00 1.183e+00 -2.390 0.016897 *
## student -3.301e+00 2.666e+00 -1.238 0.215814
## employ_other NA NA NA NA
## hlthplan -3.380e+00 8.761e-01 -3.859 0.000116 ***
## exercise -1.243e+00 6.962e-01 -1.786 0.074257 .
## smoke 4.265e+00 7.806e-01 5.463 4.98e-08 ***
## drink -3.131e+00 4.488e-01 -6.977 3.55e-12 ***
## bmi_cat1 -1.759e+00 1.466e+00 -1.200 0.230116
## bmi_cat2 -1.531e+00 1.087e+00 -1.408 0.159127
## bmi_cat3 NA NA NA NA
## bmi_cts -8.387e-02 9.851e-02 -0.851 0.394641
## genhlth_ex 1.514e+02 3.610e+02 0.419 0.675007
## genhlth_vg 1.489e+02 3.610e+02 0.413 0.679974
## genhlth_go 1.498e+02 3.609e+02 0.415 0.678214
## genhlth_fa 1.515e+02 3.609e+02 0.420 0.674751
## genhlth_po 1.627e+02 3.609e+02 0.451 0.652257
## qlactlm 4.341e+00 7.357e-01 5.901 3.94e-09 ***
## pm2.5 -8.277e-04 1.906e-02 -0.043 0.965356
## ozone 1.394e+01 7.910e+00 1.763 0.078051 .
## greenness 1.469e+00 3.530e-01 4.162 3.22e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.192 on 3715 degrees of freedom
## (13135 observations deleted due to missingness)
## Multiple R-squared: 0.3877, Adjusted R-squared: 0.382
## F-statistic: 67.22 on 35 and 3715 DF, p-value: < 2.2e-16
delete income2:
mod1 = lm(menthlth ~ age + sex + white + black + asian + educ1 + educ2 + income1 + income3 + income4 + income5 + income6 + income7 + married + employed + out_of_work + homemaker + student + hlthplan + exercise + smoke + drink + bmi_cat1 + bmi_cat2 + bmi_cts + genhlth_ex + genhlth_vg + genhlth_go + genhlth_fa + genhlth_po + qlactlm + pm2.5 + ozone + greenness,data = reg_data)
summary(mod1)
##
## Call:
## lm(formula = menthlth ~ age + sex + white + black + asian + educ1 +
## educ2 + income1 + income3 + income4 + income5 + income6 +
## income7 + married + employed + out_of_work + homemaker +
## student + hlthplan + exercise + smoke + drink + bmi_cat1 +
## bmi_cat2 + bmi_cts + genhlth_ex + genhlth_vg + genhlth_go +
## genhlth_fa + genhlth_po + qlactlm + pm2.5 + ozone + greenness,
## data = reg_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -10.2817 -1.0664 -0.0413 0.9675 21.1811
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.340e+02 3.594e+02 -0.373 0.709296
## age 2.257e-03 1.918e-02 0.118 0.906355
## sex 1.154e+00 6.607e-01 1.747 0.080694 .
## white -2.902e-02 1.376e-01 -0.211 0.832967
## black 9.504e-02 1.897e-01 0.501 0.616401
## asian -5.149e-01 3.950e-01 -1.303 0.192488
## educ1 9.588e-01 1.313e+00 0.730 0.465443
## educ2 1.490e+00 5.509e-01 2.705 0.006869 **
## income1 -7.343e+00 1.267e+00 -5.794 7.45e-09 ***
## income3 -1.800e-01 1.071e+00 -0.168 0.866523
## income4 -2.345e+00 8.879e-01 -2.641 0.008306 **
## income5 -5.430e-01 8.462e-01 -0.642 0.521096
## income6 -1.293e+00 8.018e-01 -1.613 0.106793
## income7 -8.431e-01 8.409e-01 -1.003 0.316118
## married -1.348e+00 5.555e-01 -2.427 0.015278 *
## employed -3.133e+00 7.870e-01 -3.982 6.97e-05 ***
## out_of_work 5.417e+00 1.440e+00 3.762 0.000171 ***
## homemaker -2.852e+00 1.173e+00 -2.432 0.015074 *
## student -3.242e+00 2.647e+00 -1.225 0.220729
## hlthplan -3.399e+00 8.625e-01 -3.941 8.25e-05 ***
## exercise -1.286e+00 6.911e-01 -1.861 0.062830 .
## smoke 4.293e+00 7.734e-01 5.550 3.05e-08 ***
## drink -3.153e+00 4.426e-01 -7.123 1.26e-12 ***
## bmi_cat1 -1.765e+00 1.456e+00 -1.212 0.225573
## bmi_cat2 -1.520e+00 1.080e+00 -1.407 0.159405
## bmi_cts -8.627e-02 9.788e-02 -0.881 0.378169
## genhlth_ex 1.533e+02 3.593e+02 0.427 0.669732
## genhlth_vg 1.508e+02 3.593e+02 0.420 0.674759
## genhlth_go 1.516e+02 3.593e+02 0.422 0.673026
## genhlth_fa 1.534e+02 3.593e+02 0.427 0.669538
## genhlth_po 1.646e+02 3.593e+02 0.458 0.647021
## qlactlm 4.286e+00 7.286e-01 5.882 4.42e-09 ***
## pm2.5 8.248e-04 1.876e-02 0.044 0.964932
## ozone 1.383e+01 7.797e+00 1.774 0.076140 .
## greenness 1.491e+00 3.486e-01 4.276 1.95e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.182 on 3758 degrees of freedom
## (13093 observations deleted due to missingness)
## Multiple R-squared: 0.3902, Adjusted R-squared: 0.3847
## F-statistic: 70.73 on 34 and 3758 DF, p-value: < 2.2e-16
delete age:
mod2 = lm(menthlth ~ sex + white + black + asian + educ1 + educ2 + income1 + income3 + income4 + income5 + income6 + income7 + married + employed + out_of_work + homemaker + student + hlthplan + exercise + smoke + drink + bmi_cat1 + bmi_cat2 + bmi_cts + genhlth_ex + genhlth_vg + genhlth_go + genhlth_fa + genhlth_po + qlactlm + pm2.5 + ozone + greenness,data = reg_data)
summary(mod2)
##
## Call:
## lm(formula = menthlth ~ sex + white + black + asian + educ1 +
## educ2 + income1 + income3 + income4 + income5 + income6 +
## income7 + married + employed + out_of_work + homemaker +
## student + hlthplan + exercise + smoke + drink + bmi_cat1 +
## bmi_cat2 + bmi_cts + genhlth_ex + genhlth_vg + genhlth_go +
## genhlth_fa + genhlth_po + qlactlm + pm2.5 + ozone + greenness,
## data = reg_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -10.2667 -1.0645 -0.0422 0.9689 21.2022
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.352e+02 3.592e+02 -0.377 0.70656
## sex 1.156e+00 6.604e-01 1.751 0.08007 .
## white -2.868e-02 1.376e-01 -0.208 0.83488
## black 9.442e-02 1.896e-01 0.498 0.61851
## asian -5.182e-01 3.940e-01 -1.315 0.18859
## educ1 9.475e-01 1.310e+00 0.723 0.46945
## educ2 1.494e+00 5.500e-01 2.716 0.00665 **
## income1 -7.354e+00 1.263e+00 -5.821 6.35e-09 ***
## income3 -1.702e-01 1.068e+00 -0.159 0.87336
## income4 -2.342e+00 8.874e-01 -2.639 0.00836 **
## income5 -5.411e-01 8.459e-01 -0.640 0.52242
## income6 -1.297e+00 8.012e-01 -1.619 0.10557
## income7 -8.450e-01 8.406e-01 -1.005 0.31486
## married -1.347e+00 5.553e-01 -2.425 0.01534 *
## employed -3.195e+00 5.852e-01 -5.461 5.05e-08 ***
## out_of_work 5.358e+00 1.349e+00 3.972 7.26e-05 ***
## homemaker -2.901e+00 1.096e+00 -2.646 0.00818 **
## student -3.364e+00 2.435e+00 -1.382 0.16719
## hlthplan -3.385e+00 8.540e-01 -3.964 7.51e-05 ***
## exercise -1.292e+00 6.889e-01 -1.876 0.06075 .
## smoke 4.268e+00 7.436e-01 5.739 1.03e-08 ***
## drink -3.147e+00 4.397e-01 -7.156 9.91e-13 ***
## bmi_cat1 -1.779e+00 1.451e+00 -1.226 0.22014
## bmi_cat2 -1.528e+00 1.078e+00 -1.417 0.15650
## bmi_cts -8.779e-02 9.701e-02 -0.905 0.36556
## genhlth_ex 1.547e+02 3.591e+02 0.431 0.66657
## genhlth_vg 1.523e+02 3.591e+02 0.424 0.67157
## genhlth_go 1.531e+02 3.591e+02 0.426 0.66983
## genhlth_fa 1.548e+02 3.591e+02 0.431 0.66635
## genhlth_po 1.660e+02 3.591e+02 0.462 0.64390
## qlactlm 4.289e+00 7.278e-01 5.893 4.12e-09 ***
## pm2.5 6.537e-04 1.870e-02 0.035 0.97212
## ozone 1.376e+01 7.770e+00 1.771 0.07671 .
## greenness 1.489e+00 3.482e-01 4.275 1.96e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.182 on 3759 degrees of freedom
## (13093 observations deleted due to missingness)
## Multiple R-squared: 0.3902, Adjusted R-squared: 0.3849
## F-statistic: 72.9 on 33 and 3759 DF, p-value: < 2.2e-16
delete income3:
mod3 = lm(menthlth ~ sex + white + black + asian + educ1 + educ2 + income1 + income4 + income5 + income6 + income7 + married + employed + out_of_work + homemaker + student + hlthplan + exercise + smoke + drink + bmi_cat1 + bmi_cat2 + bmi_cts + genhlth_ex + genhlth_vg + genhlth_go + genhlth_fa + genhlth_po + qlactlm + pm2.5 + ozone + greenness,data = reg_data)
summary(mod3)
##
## Call:
## lm(formula = menthlth ~ sex + white + black + asian + educ1 +
## educ2 + income1 + income4 + income5 + income6 + income7 +
## married + employed + out_of_work + homemaker + student +
## hlthplan + exercise + smoke + drink + bmi_cat1 + bmi_cat2 +
## bmi_cts + genhlth_ex + genhlth_vg + genhlth_go + genhlth_fa +
## genhlth_po + qlactlm + pm2.5 + ozone + greenness, data = reg_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -10.2744 -1.0633 -0.0421 0.9680 21.2147
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.356e+02 3.591e+02 -0.378 0.70580
## sex 1.145e+00 6.566e-01 1.744 0.08126 .
## white -2.893e-02 1.375e-01 -0.210 0.83339
## black 9.551e-02 1.895e-01 0.504 0.61420
## asian -5.173e-01 3.939e-01 -1.313 0.18925
## educ1 9.197e-01 1.298e+00 0.709 0.47861
## educ2 1.475e+00 5.371e-01 2.746 0.00606 **
## income1 -7.308e+00 1.229e+00 -5.945 3.02e-09 ***
## income4 -2.314e+00 8.702e-01 -2.659 0.00787 **
## income5 -5.186e-01 8.339e-01 -0.622 0.53407
## income6 -1.277e+00 7.909e-01 -1.614 0.10656
## income7 -8.203e-01 8.261e-01 -0.993 0.32078
## married -1.334e+00 5.490e-01 -2.429 0.01517 *
## employed -3.182e+00 5.795e-01 -5.492 4.24e-08 ***
## out_of_work 5.360e+00 1.349e+00 3.974 7.19e-05 ***
## homemaker -2.880e+00 1.088e+00 -2.646 0.00818 **
## student -3.372e+00 2.434e+00 -1.386 0.16591
## hlthplan -3.359e+00 8.375e-01 -4.010 6.18e-05 ***
## exercise -1.294e+00 6.887e-01 -1.880 0.06023 .
## smoke 4.264e+00 7.431e-01 5.737 1.04e-08 ***
## drink -3.139e+00 4.368e-01 -7.186 7.99e-13 ***
## bmi_cat1 -1.791e+00 1.449e+00 -1.236 0.21650
## bmi_cat2 -1.538e+00 1.076e+00 -1.430 0.15288
## bmi_cts -8.859e-02 9.687e-02 -0.915 0.36049
## genhlth_ex 1.550e+02 3.590e+02 0.432 0.66587
## genhlth_vg 1.526e+02 3.590e+02 0.425 0.67088
## genhlth_go 1.534e+02 3.590e+02 0.427 0.66915
## genhlth_fa 1.551e+02 3.590e+02 0.432 0.66567
## genhlth_po 1.663e+02 3.590e+02 0.463 0.64322
## qlactlm 4.283e+00 7.265e-01 5.895 4.08e-09 ***
## pm2.5 8.487e-04 1.866e-02 0.045 0.96372
## ozone 1.379e+01 7.767e+00 1.775 0.07596 .
## greenness 1.492e+00 3.477e-01 4.291 1.82e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.182 on 3760 degrees of freedom
## (13093 observations deleted due to missingness)
## Multiple R-squared: 0.3902, Adjusted R-squared: 0.385
## F-statistic: 75.19 on 32 and 3760 DF, p-value: < 2.2e-16
delete white:
mod4 = lm(menthlth ~ sex + black + asian + educ1 + educ2 + income1 + income4 + income5 + income6 + income7 + married + employed + out_of_work + homemaker + student + hlthplan + exercise + smoke + drink + bmi_cat1 + bmi_cat2 + bmi_cts + genhlth_ex + genhlth_vg + genhlth_go + genhlth_fa + genhlth_po + qlactlm + pm2.5 + ozone + greenness,data = reg_data)
summary(mod4)
##
## Call:
## lm(formula = menthlth ~ sex + black + asian + educ1 + educ2 +
## income1 + income4 + income5 + income6 + income7 + married +
## employed + out_of_work + homemaker + student + hlthplan +
## exercise + smoke + drink + bmi_cat1 + bmi_cat2 + bmi_cts +
## genhlth_ex + genhlth_vg + genhlth_go + genhlth_fa + genhlth_po +
## qlactlm + pm2.5 + ozone + greenness, data = reg_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -10.3060 -1.0587 -0.0427 0.9594 21.1859
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.369e+02 3.590e+02 -0.381 0.70293
## sex 1.151e+00 6.557e-01 1.755 0.07928 .
## black 1.351e-01 1.502e-01 0.899 0.36860
## asian -3.681e-01 3.662e-01 -1.005 0.31483
## educ1 9.121e-01 1.297e+00 0.703 0.48189
## educ2 1.495e+00 5.358e-01 2.791 0.00528 **
## income1 -7.303e+00 1.228e+00 -5.947 2.99e-09 ***
## income4 -2.279e+00 8.692e-01 -2.622 0.00877 **
## income5 -5.108e-01 8.319e-01 -0.614 0.53927
## income6 -1.271e+00 7.896e-01 -1.610 0.10747
## income7 -8.344e-01 8.250e-01 -1.011 0.31190
## married -1.285e+00 5.474e-01 -2.348 0.01893 *
## employed -3.170e+00 5.788e-01 -5.476 4.64e-08 ***
## out_of_work 5.329e+00 1.347e+00 3.957 7.73e-05 ***
## homemaker -2.845e+00 1.088e+00 -2.616 0.00893 **
## student -3.195e+00 2.429e+00 -1.315 0.18844
## hlthplan -3.434e+00 8.366e-01 -4.105 4.12e-05 ***
## exercise -1.324e+00 6.877e-01 -1.926 0.05423 .
## smoke 4.271e+00 7.418e-01 5.757 9.25e-09 ***
## drink -3.123e+00 4.362e-01 -7.159 9.73e-13 ***
## bmi_cat1 -1.733e+00 1.447e+00 -1.198 0.23107
## bmi_cat2 -1.517e+00 1.075e+00 -1.411 0.15830
## bmi_cts -8.471e-02 9.681e-02 -0.875 0.38165
## genhlth_ex 1.563e+02 3.589e+02 0.436 0.66322
## genhlth_vg 1.538e+02 3.589e+02 0.429 0.66830
## genhlth_go 1.546e+02 3.589e+02 0.431 0.66662
## genhlth_fa 1.564e+02 3.589e+02 0.436 0.66305
## genhlth_po 1.676e+02 3.589e+02 0.467 0.64051
## qlactlm 4.226e+00 7.248e-01 5.830 6.01e-09 ***
## pm2.5 4.473e-04 1.862e-02 0.024 0.98083
## ozone 1.365e+01 7.735e+00 1.764 0.07775 .
## greenness 1.505e+00 3.462e-01 4.349 1.41e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.181 on 3773 degrees of freedom
## (13081 observations deleted due to missingness)
## Multiple R-squared: 0.39, Adjusted R-squared: 0.385
## F-statistic: 77.81 on 31 and 3773 DF, p-value: < 2.2e-16
delete genhlth_vg:
mod5 = lm(menthlth ~ sex + black + asian + educ1 + educ2 + income1 + income4 + income5 + income6 + income7 + married + employed + out_of_work + homemaker + student + hlthplan + exercise + smoke + drink + bmi_cat1 + bmi_cat2 + bmi_cts + genhlth_ex + genhlth_go + genhlth_fa + genhlth_po + qlactlm + pm2.5 + ozone + greenness,data = reg_data)
summary(mod5)
##
## Call:
## lm(formula = menthlth ~ sex + black + asian + educ1 + educ2 +
## income1 + income4 + income5 + income6 + income7 + married +
## employed + out_of_work + homemaker + student + hlthplan +
## exercise + smoke + drink + bmi_cat1 + bmi_cat2 + bmi_cts +
## genhlth_ex + genhlth_go + genhlth_fa + genhlth_po + qlactlm +
## pm2.5 + ozone + greenness, data = reg_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -10.3048 -1.0580 -0.0423 0.9593 21.1851
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 16.9059315 3.8083300 4.439 9.29e-06 ***
## sex 1.1545614 0.6555309 1.761 0.07828 .
## black 0.1350305 0.1501968 0.899 0.36870
## asian -0.3668663 0.3661573 -1.002 0.31644
## educ1 0.9039184 1.2965802 0.697 0.48575
## educ2 1.4915840 0.5356231 2.785 0.00538 **
## income1 -7.3075294 1.2279432 -5.951 2.91e-09 ***
## income4 -2.2781783 0.8691468 -2.621 0.00880 **
## income5 -0.5170001 0.8316940 -0.622 0.53423
## income6 -1.2732501 0.7895452 -1.613 0.10691
## income7 -0.8456256 0.8245048 -1.026 0.30514
## married -1.2867348 0.5473429 -2.351 0.01878 *
## employed -3.1773746 0.5784989 -5.492 4.23e-08 ***
## out_of_work 5.3258498 1.3465454 3.955 7.79e-05 ***
## homemaker -2.8495650 1.0874395 -2.620 0.00882 **
## student -3.2042958 2.4285592 -1.319 0.18711
## hlthplan -3.4410065 0.8363564 -4.114 3.97e-05 ***
## exercise -1.3249926 0.6876680 -1.927 0.05408 .
## smoke 4.2618061 0.7414596 5.748 9.75e-09 ***
## drink -3.1209840 0.4361487 -7.156 9.95e-13 ***
## bmi_cat1 -1.7450688 1.4464325 -1.206 0.22771
## bmi_cat2 -1.5219590 1.0751466 -1.416 0.15698
## bmi_cts -0.0850834 0.0967951 -0.879 0.37945
## genhlth_ex 2.5089983 0.9950005 2.522 0.01172 *
## genhlth_go 0.8268782 0.8226417 1.005 0.31489
## genhlth_fa 2.5939973 1.0807838 2.400 0.01644 *
## genhlth_po 13.8189923 1.4273400 9.682 < 2e-16 ***
## qlactlm 4.2227559 0.7247162 5.827 6.12e-09 ***
## pm2.5 0.0003147 0.0186119 0.017 0.98651
## ozone 13.7257738 7.7322703 1.775 0.07596 .
## greenness 1.5066414 0.3461142 4.353 1.38e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.181 on 3774 degrees of freedom
## (13081 observations deleted due to missingness)
## Multiple R-squared: 0.39, Adjusted R-squared: 0.3851
## F-statistic: 80.42 on 30 and 3774 DF, p-value: < 2.2e-16
delete income5:
mod6 = lm(menthlth ~ sex + black + asian + educ1 + educ2 + income1 + income4 + income6 + income7 + married + employed + out_of_work + homemaker + student + hlthplan + exercise + smoke + drink + bmi_cat1 + bmi_cat2 + bmi_cts + genhlth_ex + genhlth_go + genhlth_fa + genhlth_po + qlactlm + pm2.5 + ozone + greenness,data = reg_data)
summary(mod6)
##
## Call:
## lm(formula = menthlth ~ sex + black + asian + educ1 + educ2 +
## income1 + income4 + income6 + income7 + married + employed +
## out_of_work + homemaker + student + hlthplan + exercise +
## smoke + drink + bmi_cat1 + bmi_cat2 + bmi_cts + genhlth_ex +
## genhlth_go + genhlth_fa + genhlth_po + qlactlm + pm2.5 +
## ozone + greenness, data = reg_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -10.2400 -1.0615 -0.0447 0.9645 21.1293
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 16.7970457 3.8039904 4.416 1.04e-05 ***
## sex 1.1666259 0.6551902 1.781 0.07506 .
## black 0.1392310 0.1500326 0.928 0.35346
## asian -0.3613184 0.3660188 -0.987 0.32363
## educ1 0.9163116 1.2963215 0.707 0.47970
## educ2 1.4154634 0.5213950 2.715 0.00666 **
## income1 -7.1717625 1.2082655 -5.936 3.19e-09 ***
## income4 -2.2455138 0.8674864 -2.589 0.00968 **
## income6 -1.2078350 0.7824373 -1.544 0.12275
## income7 -0.7670658 0.8146955 -0.942 0.34649
## married -1.2712945 0.5467346 -2.325 0.02011 *
## employed -3.1449712 0.5760988 -5.459 5.09e-08 ***
## out_of_work 5.3854958 1.3430132 4.010 6.19e-05 ***
## homemaker -2.8026699 1.0847314 -2.584 0.00981 **
## student -3.2040596 2.4283617 -1.319 0.18710
## hlthplan -3.3971077 0.8333019 -4.077 4.66e-05 ***
## exercise -1.3410662 0.6871259 -1.952 0.05105 .
## smoke 4.2415793 0.7406851 5.727 1.10e-08 ***
## drink -3.1214711 0.4361125 -7.157 9.83e-13 ***
## bmi_cat1 -1.7463674 1.4463134 -1.207 0.22733
## bmi_cat2 -1.5368974 1.0747906 -1.430 0.15281
## bmi_cts -0.0868244 0.0967467 -0.897 0.36954
## genhlth_ex 2.5422568 0.9934802 2.559 0.01054 *
## genhlth_go 0.8205911 0.8225127 0.998 0.31851
## genhlth_fa 2.6165495 1.0800869 2.423 0.01546 *
## genhlth_po 13.8579888 1.4258449 9.719 < 2e-16 ***
## qlactlm 4.2151283 0.7245534 5.818 6.47e-09 ***
## pm2.5 0.0006723 0.0186015 0.036 0.97117
## ozone 13.9745241 7.7212811 1.810 0.07040 .
## greenness 1.5185102 0.3455590 4.394 1.14e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.18 on 3775 degrees of freedom
## (13081 observations deleted due to missingness)
## Multiple R-squared: 0.3899, Adjusted R-squared: 0.3852
## F-statistic: 83.19 on 29 and 3775 DF, p-value: < 2.2e-16
delete educ1:
mod7 = lm(menthlth ~ sex + black + asian + educ2 + income1 + income4 + income6 + income7 + married + employed + out_of_work + homemaker + student + hlthplan + exercise + smoke + drink + bmi_cat1 + bmi_cat2 + bmi_cts + genhlth_ex + genhlth_go + genhlth_fa + genhlth_po + qlactlm + pm2.5 + ozone + greenness,data = reg_data)
summary(mod7)
##
## Call:
## lm(formula = menthlth ~ sex + black + asian + educ2 + income1 +
## income4 + income6 + income7 + married + employed + out_of_work +
## homemaker + student + hlthplan + exercise + smoke + drink +
## bmi_cat1 + bmi_cat2 + bmi_cts + genhlth_ex + genhlth_go +
## genhlth_fa + genhlth_po + qlactlm + pm2.5 + ozone + greenness,
## data = reg_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -10.1891 -1.0537 -0.0430 0.9667 21.1571
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 16.931884 3.792287 4.465 8.25e-06 ***
## sex 1.132782 0.653317 1.734 0.08302 .
## black 0.136321 0.149746 0.910 0.36270
## asian -0.362910 0.365669 -0.992 0.32104
## educ2 1.350864 0.509700 2.650 0.00808 **
## income1 -6.978244 1.174115 -5.943 3.04e-09 ***
## income4 -2.237425 0.865841 -2.584 0.00980 **
## income6 -1.204154 0.781912 -1.540 0.12364
## income7 -0.827323 0.811953 -1.019 0.30830
## married -1.240633 0.545939 -2.272 0.02311 *
## employed -3.129805 0.575115 -5.442 5.60e-08 ***
## out_of_work 5.397968 1.341426 4.024 5.83e-05 ***
## homemaker -2.740568 1.080720 -2.536 0.01126 *
## student -3.370241 2.422361 -1.391 0.16421
## hlthplan -3.462430 0.825121 -4.196 2.78e-05 ***
## exercise -1.382524 0.684225 -2.021 0.04339 *
## smoke 4.194612 0.736235 5.697 1.31e-08 ***
## drink -3.145698 0.434747 -7.236 5.58e-13 ***
## bmi_cat1 -1.737268 1.445301 -1.202 0.22943
## bmi_cat2 -1.518598 1.074117 -1.414 0.15750
## bmi_cts -0.087568 0.096671 -0.906 0.36508
## genhlth_ex 2.520727 0.992915 2.539 0.01117 *
## genhlth_go 0.851434 0.819769 1.039 0.29904
## genhlth_fa 2.781992 1.057813 2.630 0.00857 **
## genhlth_po 14.073221 1.390628 10.120 < 2e-16 ***
## qlactlm 4.152048 0.719144 5.774 8.38e-09 ***
## pm2.5 0.002109 0.018546 0.114 0.90948
## ozone 14.018889 7.712750 1.818 0.06920 .
## greenness 1.494825 0.344899 4.334 1.50e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.18 on 3779 degrees of freedom
## (13078 observations deleted due to missingness)
## Multiple R-squared: 0.3902, Adjusted R-squared: 0.3856
## F-statistic: 86.35 on 28 and 3779 DF, p-value: < 2.2e-16
delete bmi_cts:
mod8 = lm(menthlth ~ sex + black + asian + educ2 + income1 + income4 + income6 + income7 + married + employed + out_of_work + homemaker + student + hlthplan + exercise + smoke + drink + bmi_cat1 + bmi_cat2 + genhlth_ex + genhlth_go + genhlth_fa + genhlth_po + qlactlm + pm2.5 + ozone + greenness,data = reg_data)
summary(mod8)
##
## Call:
## lm(formula = menthlth ~ sex + black + asian + educ2 + income1 +
## income4 + income6 + income7 + married + employed + out_of_work +
## homemaker + student + hlthplan + exercise + smoke + drink +
## bmi_cat1 + bmi_cat2 + genhlth_ex + genhlth_go + genhlth_fa +
## genhlth_po + qlactlm + pm2.5 + ozone + greenness, data = reg_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -10.2559 -1.0592 -0.0402 0.9714 21.1316
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 13.809691 1.581582 8.732 < 2e-16 ***
## sex 1.108766 0.652763 1.699 0.08948 .
## black 0.131590 0.149651 0.879 0.37929
## asian -0.355522 0.365569 -0.973 0.33086
## educ2 1.338162 0.509495 2.626 0.00866 **
## income1 -6.991240 1.173999 -5.955 2.84e-09 ***
## income4 -2.229043 0.865771 -2.575 0.01007 *
## income6 -1.190139 0.781740 -1.522 0.12799
## income7 -0.804173 0.811532 -0.991 0.32178
## married -1.231360 0.545830 -2.256 0.02413 *
## employed -3.152500 0.574555 -5.487 4.36e-08 ***
## out_of_work 5.337033 1.339706 3.984 6.91e-05 ***
## homemaker -2.722302 1.080507 -2.519 0.01179 *
## student -3.255842 2.419009 -1.346 0.17840
## hlthplan -3.432160 0.824425 -4.163 3.21e-05 ***
## exercise -1.336410 0.682312 -1.959 0.05023 .
## smoke 4.230261 0.735165 5.754 9.40e-09 ***
## drink -3.142121 0.434718 -7.228 5.90e-13 ***
## bmi_cat1 -0.596659 0.709483 -0.841 0.40041
## bmi_cat2 -0.825977 0.754364 -1.095 0.27362
## genhlth_ex 2.567698 0.991537 2.590 0.00965 **
## genhlth_go 0.852631 0.819748 1.040 0.29835
## genhlth_fa 2.769813 1.057703 2.619 0.00886 **
## genhlth_po 14.080355 1.390573 10.126 < 2e-16 ***
## qlactlm 4.097882 0.716637 5.718 1.16e-08 ***
## pm2.5 0.001368 0.018527 0.074 0.94116
## ozone 14.239468 7.708722 1.847 0.06480 .
## greenness 1.490424 0.344856 4.322 1.59e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.18 on 3780 degrees of freedom
## (13078 observations deleted due to missingness)
## Multiple R-squared: 0.39, Adjusted R-squared: 0.3857
## F-statistic: 89.52 on 27 and 3780 DF, p-value: < 2.2e-16
delete bmi_cat1:
mod9 = lm(menthlth ~ sex + black + asian + educ2 + income1 + income4 + income6 + income7 + married + employed + out_of_work + homemaker + student + hlthplan + exercise + smoke + drink + bmi_cat2 + genhlth_ex + genhlth_go + genhlth_fa + genhlth_po + qlactlm + pm2.5 + ozone + greenness,data = reg_data)
summary(mod9)
##
## Call:
## lm(formula = menthlth ~ sex + black + asian + educ2 + income1 +
## income4 + income6 + income7 + married + employed + out_of_work +
## homemaker + student + hlthplan + exercise + smoke + drink +
## bmi_cat2 + genhlth_ex + genhlth_go + genhlth_fa + genhlth_po +
## qlactlm + pm2.5 + ozone + greenness, data = reg_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -10.2694 -1.0571 -0.0408 0.9764 21.0710
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 13.455543 1.524429 8.827 < 2e-16 ***
## sex 1.069038 0.651026 1.642 0.10066
## black 0.140336 0.149283 0.940 0.34725
## asian -0.370013 0.365148 -1.013 0.31097
## educ2 1.414777 0.501264 2.822 0.00479 **
## income1 -6.946582 1.172752 -5.923 3.44e-09 ***
## income4 -2.190794 0.864542 -2.534 0.01132 *
## income6 -1.144794 0.779848 -1.468 0.14220
## income7 -0.777008 0.810857 -0.958 0.33800
## married -1.199491 0.544492 -2.203 0.02766 *
## employed -3.107439 0.572029 -5.432 5.91e-08 ***
## out_of_work 5.386927 1.338340 4.025 5.81e-05 ***
## homemaker -2.718863 1.080457 -2.516 0.01190 *
## student -3.252090 2.418911 -1.344 0.17889
## hlthplan -3.417345 0.824205 -4.146 3.45e-05 ***
## exercise -1.400126 0.678067 -2.065 0.03900 *
## smoke 4.150608 0.729009 5.693 1.34e-08 ***
## drink -3.164315 0.433900 -7.293 3.68e-13 ***
## bmi_cat2 -0.524877 0.663958 -0.791 0.42927
## genhlth_ex 2.428255 0.977537 2.484 0.01303 *
## genhlth_go 0.934257 0.813950 1.148 0.25112
## genhlth_fa 2.867366 1.051282 2.727 0.00641 **
## genhlth_po 14.126455 1.389438 10.167 < 2e-16 ***
## qlactlm 4.128102 0.715708 5.768 8.67e-09 ***
## pm2.5 0.002069 0.018508 0.112 0.91100
## ozone 13.982930 7.702386 1.815 0.06954 .
## greenness 1.502176 0.344560 4.360 1.34e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.18 on 3781 degrees of freedom
## (13078 observations deleted due to missingness)
## Multiple R-squared: 0.3899, Adjusted R-squared: 0.3857
## F-statistic: 92.94 on 26 and 3781 DF, p-value: < 2.2e-16
delete bmi_cat2:
mod10 = lm(menthlth ~ sex + black + asian + educ2 + income1 + income4 + income6 + income7 + married + employed + out_of_work + homemaker + student + hlthplan + exercise + smoke + drink + genhlth_ex + genhlth_go + genhlth_fa + genhlth_po + qlactlm + pm2.5 + ozone + greenness,data = reg_data)
summary(mod10)
##
## Call:
## lm(formula = menthlth ~ sex + black + asian + educ2 + income1 +
## income4 + income6 + income7 + married + employed + out_of_work +
## homemaker + student + hlthplan + exercise + smoke + drink +
## genhlth_ex + genhlth_go + genhlth_fa + genhlth_po + qlactlm +
## pm2.5 + ozone + greenness, data = reg_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -10.2751 -1.0612 -0.0432 0.9703 20.9489
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 13.233353 1.498220 8.833 < 2e-16 ***
## sex 1.106615 0.649257 1.704 0.08838 .
## black 0.136771 0.149208 0.917 0.35939
## asian -0.370158 0.365130 -1.014 0.31076
## educ2 1.387587 0.500058 2.775 0.00555 **
## income1 -6.952568 1.172670 -5.929 3.32e-09 ***
## income4 -2.171941 0.864170 -2.513 0.01200 *
## income6 -1.159868 0.779577 -1.488 0.13688
## income7 -0.808770 0.809821 -0.999 0.31800
## married -1.232731 0.542839 -2.271 0.02321 *
## employed -3.080563 0.570990 -5.395 7.27e-08 ***
## out_of_work 5.427554 1.337287 4.059 5.04e-05 ***
## homemaker -2.704300 1.080246 -2.503 0.01234 *
## student -3.178348 2.416992 -1.315 0.18859
## hlthplan -3.393198 0.823598 -4.120 3.87e-05 ***
## exercise -1.416326 0.677723 -2.090 0.03670 *
## smoke 4.147400 0.728962 5.689 1.37e-08 ***
## drink -3.178133 0.433526 -7.331 2.78e-13 ***
## genhlth_ex 2.461532 0.976582 2.521 0.01176 *
## genhlth_go 0.955399 0.813471 1.174 0.24028
## genhlth_fa 2.891422 1.050789 2.752 0.00596 **
## genhlth_po 14.153748 1.388940 10.190 < 2e-16 ***
## qlactlm 4.154299 0.714905 5.811 6.72e-09 ***
## pm2.5 0.002191 0.018506 0.118 0.90575
## ozone 13.926352 7.701671 1.808 0.07065 .
## greenness 1.500980 0.344539 4.356 1.36e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.18 on 3782 degrees of freedom
## (13078 observations deleted due to missingness)
## Multiple R-squared: 0.3898, Adjusted R-squared: 0.3858
## F-statistic: 96.65 on 25 and 3782 DF, p-value: < 2.2e-16
delete black:
mod11 = lm(menthlth ~ sex + asian + educ2 + income1 + income4 + income6 + income7 + married + employed + out_of_work + homemaker + student + hlthplan + exercise + smoke + drink + genhlth_ex + genhlth_go + genhlth_fa + genhlth_po + qlactlm + pm2.5 + ozone + greenness,data = reg_data)
summary(mod11)
##
## Call:
## lm(formula = menthlth ~ sex + asian + educ2 + income1 + income4 +
## income6 + income7 + married + employed + out_of_work + homemaker +
## student + hlthplan + exercise + smoke + drink + genhlth_ex +
## genhlth_go + genhlth_fa + genhlth_po + qlactlm + pm2.5 +
## ozone + greenness, data = reg_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -10.2626 -1.0407 -0.0406 0.9574 21.0823
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 13.1316475 1.4555314 9.022 < 2e-16 ***
## sex 1.3204685 0.6324717 2.088 0.03688 *
## asian -0.1872403 0.2658969 -0.704 0.48136
## educ2 1.3279972 0.4858312 2.733 0.00630 **
## income1 -6.9002780 1.1488503 -6.006 2.07e-09 ***
## income4 -2.2420937 0.8440959 -2.656 0.00793 **
## income6 -1.0846417 0.7564290 -1.434 0.15168
## income7 -0.7967446 0.7859403 -1.014 0.31077
## married -1.2526395 0.5193240 -2.412 0.01591 *
## employed -3.1059396 0.5549723 -5.597 2.33e-08 ***
## out_of_work 5.3144316 1.3061909 4.069 4.82e-05 ***
## homemaker -2.8937121 1.0514939 -2.752 0.00595 **
## student -3.7268272 2.2898653 -1.628 0.10370
## hlthplan -3.4181576 0.8010686 -4.267 2.03e-05 ***
## exercise -1.3927426 0.6581059 -2.116 0.03438 *
## smoke 4.3199347 0.7089008 6.094 1.21e-09 ***
## drink -3.2331399 0.4193523 -7.710 1.58e-14 ***
## genhlth_ex 2.4471046 0.9505498 2.574 0.01008 *
## genhlth_go 1.0592587 0.7930507 1.336 0.18173
## genhlth_fa 2.9364303 1.0255170 2.863 0.00421 **
## genhlth_po 14.1865856 1.3531702 10.484 < 2e-16 ***
## qlactlm 3.9966302 0.6900912 5.791 7.52e-09 ***
## pm2.5 -0.0008375 0.0176943 -0.047 0.96225
## ozone 15.2872421 7.3472445 2.081 0.03753 *
## greenness 1.5439223 0.3303762 4.673 3.06e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.151 on 3950 degrees of freedom
## (12911 observations deleted due to missingness)
## Multiple R-squared: 0.3987, Adjusted R-squared: 0.395
## F-statistic: 109.1 on 24 and 3950 DF, p-value: < 2.2e-16
delete asian:
mod12 = lm(menthlth ~ sex + educ2 + income1 + income4 + income6 + income7 + married + employed + out_of_work + homemaker + student + hlthplan + exercise + smoke + drink + genhlth_ex + genhlth_go + genhlth_fa + genhlth_po + qlactlm + pm2.5 + ozone + greenness,data = reg_data)
summary(mod12)
##
## Call:
## lm(formula = menthlth ~ sex + educ2 + income1 + income4 + income6 +
## income7 + married + employed + out_of_work + homemaker +
## student + hlthplan + exercise + smoke + drink + genhlth_ex +
## genhlth_go + genhlth_fa + genhlth_po + qlactlm + pm2.5 +
## ozone + greenness, data = reg_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -19.6846 -1.2947 -0.0493 1.1526 22.6382
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 10.255664 0.916346 11.192 < 2e-16 ***
## sex -0.061897 0.406604 -0.152 0.879010
## educ2 0.571854 0.311227 1.837 0.066174 .
## income1 -0.750505 0.713574 -1.052 0.292934
## income4 0.332985 0.551755 0.604 0.546187
## income6 0.400030 0.476289 0.840 0.400988
## income7 -0.874572 0.489511 -1.787 0.074025 .
## married -0.122057 0.344987 -0.354 0.723493
## employed -1.831612 0.367889 -4.979 6.49e-07 ***
## out_of_work 4.015560 0.887239 4.526 6.07e-06 ***
## homemaker -1.301440 0.681900 -1.909 0.056345 .
## student -7.871992 1.356813 -5.802 6.73e-09 ***
## hlthplan -1.802891 0.517532 -3.484 0.000497 ***
## exercise -2.011671 0.408545 -4.924 8.60e-07 ***
## smoke 6.260483 0.438637 14.273 < 2e-16 ***
## drink -2.939469 0.274782 -10.697 < 2e-16 ***
## genhlth_ex 0.993972 0.587334 1.692 0.090608 .
## genhlth_go -0.012063 0.483617 -0.025 0.980101
## genhlth_fa 2.789457 0.631444 4.418 1.01e-05 ***
## genhlth_po 12.451268 0.836273 14.889 < 2e-16 ***
## qlactlm 2.479303 0.446270 5.556 2.83e-08 ***
## pm2.5 0.004621 0.013444 0.344 0.731040
## ozone 32.239021 5.646010 5.710 1.16e-08 ***
## greenness 2.889014 0.261178 11.061 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.768 on 11367 degrees of freedom
## (5495 observations deleted due to missingness)
## Multiple R-squared: 0.302, Adjusted R-squared: 0.3006
## F-statistic: 213.9 on 23 and 11367 DF, p-value: < 2.2e-16
delete sex:
mod13 = lm(menthlth ~ educ2 + income1 + income4 + income6 + income7 + married + employed + out_of_work + homemaker + student + hlthplan + exercise + smoke + drink + genhlth_ex + genhlth_go + genhlth_fa + genhlth_po + qlactlm + pm2.5 + ozone + greenness,data = reg_data)
summary(mod13)
##
## Call:
## lm(formula = menthlth ~ educ2 + income1 + income4 + income6 +
## income7 + married + employed + out_of_work + homemaker +
## student + hlthplan + exercise + smoke + drink + genhlth_ex +
## genhlth_go + genhlth_fa + genhlth_po + qlactlm + pm2.5 +
## ozone + greenness, data = reg_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -19.6824 -1.2946 -0.0497 1.1532 22.6351
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 10.206697 0.857994 11.896 < 2e-16 ***
## educ2 0.573939 0.310912 1.846 0.064921 .
## income1 -0.750924 0.713538 -1.052 0.292641
## income4 0.334825 0.551598 0.607 0.543857
## income6 0.402370 0.476021 0.845 0.397973
## income7 -0.872864 0.489361 -1.784 0.074502 .
## married -0.112655 0.339399 -0.332 0.739951
## employed -1.830517 0.367803 -4.977 6.56e-07 ***
## out_of_work 4.016963 0.887153 4.528 6.02e-06 ***
## homemaker -1.322031 0.668321 -1.978 0.047937 *
## student -7.871466 1.356751 -5.802 6.74e-09 ***
## hlthplan -1.805689 0.517184 -3.491 0.000482 ***
## exercise -2.007987 0.407810 -4.924 8.61e-07 ***
## smoke 6.262179 0.438477 14.282 < 2e-16 ***
## drink -2.932186 0.270573 -10.837 < 2e-16 ***
## genhlth_ex 0.992737 0.587253 1.690 0.090964 .
## genhlth_go -0.012455 0.483589 -0.026 0.979453
## genhlth_fa 2.791423 0.631285 4.422 9.88e-06 ***
## genhlth_po 12.454114 0.836028 14.897 < 2e-16 ***
## qlactlm 2.480392 0.446194 5.559 2.77e-08 ***
## pm2.5 0.004536 0.013431 0.338 0.735610
## ozone 32.281336 5.638920 5.725 1.06e-08 ***
## greenness 2.887478 0.260972 11.064 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.768 on 11368 degrees of freedom
## (5495 observations deleted due to missingness)
## Multiple R-squared: 0.302, Adjusted R-squared: 0.3007
## F-statistic: 223.6 on 22 and 11368 DF, p-value: < 2.2e-16
delete genhlth_go:
mod14 = lm(menthlth ~ educ2 + income1 + income4 + income6 + income7 + married + employed + out_of_work + homemaker + student + hlthplan + exercise + smoke + drink + genhlth_ex + genhlth_fa + genhlth_po + qlactlm + pm2.5 + ozone + greenness,data = reg_data)
summary(mod14)
##
## Call:
## lm(formula = menthlth ~ educ2 + income1 + income4 + income6 +
## income7 + married + employed + out_of_work + homemaker +
## student + hlthplan + exercise + smoke + drink + genhlth_ex +
## genhlth_fa + genhlth_po + qlactlm + pm2.5 + ozone + greenness,
## data = reg_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -19.680 -1.294 -0.050 1.153 22.634
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 10.197105 0.772912 13.193 < 2e-16 ***
## educ2 0.573085 0.309129 1.854 0.06378 .
## income1 -0.751498 0.713158 -1.054 0.29201
## income4 0.334921 0.551562 0.607 0.54372
## income6 0.402592 0.475922 0.846 0.39761
## income7 -0.872368 0.488961 -1.784 0.07443 .
## married -0.112239 0.338998 -0.331 0.74058
## employed -1.829920 0.367057 -4.985 6.27e-07 ***
## out_of_work 4.017215 0.887060 4.529 6.00e-06 ***
## homemaker -1.322044 0.668292 -1.978 0.04793 *
## student -7.871583 1.356683 -5.802 6.72e-09 ***
## hlthplan -1.804020 0.513087 -3.516 0.00044 ***
## exercise -2.007095 0.406319 -4.940 7.94e-07 ***
## smoke 6.261979 0.438389 14.284 < 2e-16 ***
## drink -2.931424 0.268940 -10.900 < 2e-16 ***
## genhlth_ex 0.999657 0.522162 1.914 0.05559 .
## genhlth_fa 2.798601 0.566406 4.941 7.88e-07 ***
## genhlth_po 12.461662 0.782942 15.916 < 2e-16 ***
## qlactlm 2.479634 0.445202 5.570 2.61e-08 ***
## pm2.5 0.004525 0.013425 0.337 0.73607
## ozone 32.290143 5.628296 5.737 9.88e-09 ***
## greenness 2.887963 0.260282 11.096 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.768 on 11369 degrees of freedom
## (5495 observations deleted due to missingness)
## Multiple R-squared: 0.302, Adjusted R-squared: 0.3007
## F-statistic: 234.3 on 21 and 11369 DF, p-value: < 2.2e-16
delete married:
mod15 = lm(menthlth ~ educ2 + income1 + income4 + income6 + income7 + employed + out_of_work + homemaker + student + hlthplan + exercise + smoke + drink + genhlth_ex + genhlth_fa + genhlth_po + qlactlm + pm2.5 + ozone + greenness,data = reg_data)
summary(mod15)
##
## Call:
## lm(formula = menthlth ~ educ2 + income1 + income4 + income6 +
## income7 + employed + out_of_work + homemaker + student +
## hlthplan + exercise + smoke + drink + genhlth_ex + genhlth_fa +
## genhlth_po + qlactlm + pm2.5 + ozone + greenness, data = reg_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -19.6631 -1.2933 -0.0495 1.1533 22.6260
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 10.150332 0.759862 13.358 < 2e-16 ***
## educ2 0.567198 0.308605 1.838 0.066097 .
## income1 -0.702147 0.697380 -1.007 0.314035
## income4 0.346595 0.550412 0.630 0.528902
## income6 0.392301 0.474887 0.826 0.408769
## income7 -0.897508 0.483010 -1.858 0.063173 .
## employed -1.842952 0.364926 -5.050 4.48e-07 ***
## out_of_work 4.028979 0.886313 4.546 5.53e-06 ***
## homemaker -1.361293 0.657668 -2.070 0.038486 *
## student -7.818007 1.346946 -5.804 6.64e-09 ***
## hlthplan -1.816531 0.511673 -3.550 0.000387 ***
## exercise -2.011937 0.406040 -4.955 7.34e-07 ***
## smoke 6.277095 0.435988 14.397 < 2e-16 ***
## drink -2.920737 0.266986 -10.940 < 2e-16 ***
## genhlth_ex 1.002233 0.522083 1.920 0.054923 .
## genhlth_fa 2.810562 0.565231 4.972 6.71e-07 ***
## genhlth_po 12.464606 0.782861 15.922 < 2e-16 ***
## qlactlm 2.477284 0.445128 5.565 2.68e-08 ***
## pm2.5 0.004938 0.013366 0.369 0.711816
## ozone 32.206553 5.622410 5.728 1.04e-08 ***
## greenness 2.886521 0.260235 11.092 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.767 on 11370 degrees of freedom
## (5495 observations deleted due to missingness)
## Multiple R-squared: 0.302, Adjusted R-squared: 0.3008
## F-statistic: 246 on 20 and 11370 DF, p-value: < 2.2e-16
delete income4:
mod16 = lm(menthlth ~ educ2 + income1 + income6 + income7 + employed + out_of_work + homemaker + student + hlthplan + exercise + smoke + drink + genhlth_ex + genhlth_fa + genhlth_po + qlactlm + pm2.5 + ozone + greenness,data = reg_data)
summary(mod16)
##
## Call:
## lm(formula = menthlth ~ educ2 + income1 + income6 + income7 +
## employed + out_of_work + homemaker + student + hlthplan +
## exercise + smoke + drink + genhlth_ex + genhlth_fa + genhlth_po +
## qlactlm + pm2.5 + ozone + greenness, data = reg_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -19.7490 -1.2902 -0.0484 1.1506 22.7061
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 10.267050 0.742162 13.834 < 2e-16 ***
## educ2 0.602904 0.304090 1.983 0.047430 *
## income1 -0.792955 0.689181 -1.151 0.249930
## income6 0.349901 0.470356 0.744 0.456948
## income7 -0.987105 0.473388 -2.085 0.037074 *
## employed -1.863608 0.362637 -5.139 2.81e-07 ***
## out_of_work 4.022371 0.885837 4.541 5.66e-06 ***
## homemaker -1.390504 0.655896 -2.120 0.034027 *
## student -7.829242 1.343951 -5.826 5.85e-09 ***
## hlthplan -1.853019 0.509587 -3.636 0.000278 ***
## exercise -2.009024 0.405708 -4.952 7.45e-07 ***
## smoke 6.266625 0.435244 14.398 < 2e-16 ***
## drink -2.933354 0.266005 -11.027 < 2e-16 ***
## genhlth_ex 0.986282 0.521525 1.891 0.058630 .
## genhlth_fa 2.832592 0.564557 5.017 5.32e-07 ***
## genhlth_po 12.443779 0.782558 15.901 < 2e-16 ***
## qlactlm 2.479736 0.444978 5.573 2.57e-08 ***
## pm2.5 0.003508 0.013286 0.264 0.791767
## ozone 32.146947 5.615236 5.725 1.06e-08 ***
## greenness 2.888105 0.260095 11.104 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.767 on 11382 degrees of freedom
## (5484 observations deleted due to missingness)
## Multiple R-squared: 0.3019, Adjusted R-squared: 0.3008
## F-statistic: 259.1 on 19 and 11382 DF, p-value: < 2.2e-16
delete income6:
mod17 = lm(menthlth ~ educ2 + income1 + income7 + employed + out_of_work + homemaker + student + hlthplan + exercise + smoke + drink + genhlth_ex + genhlth_fa + genhlth_po + qlactlm + pm2.5 + ozone + greenness,data = reg_data)
summary(mod17)
##
## Call:
## lm(formula = menthlth ~ educ2 + income1 + income7 + employed +
## out_of_work + homemaker + student + hlthplan + exercise +
## smoke + drink + genhlth_ex + genhlth_fa + genhlth_po + qlactlm +
## pm2.5 + ozone + greenness, data = reg_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -19.782 -1.288 -0.048 1.154 22.693
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 10.313364 0.738347 13.968 < 2e-16 ***
## educ2 0.623898 0.302038 2.066 0.038886 *
## income1 -0.860746 0.683104 -1.260 0.207677
## income7 -1.048633 0.467090 -2.245 0.024785 *
## employed -1.847903 0.362151 -5.103 3.40e-07 ***
## out_of_work 3.990747 0.884935 4.510 6.56e-06 ***
## homemaker -1.402596 0.655731 -2.139 0.032459 *
## student -7.839802 1.343049 -5.837 5.45e-09 ***
## hlthplan -1.829015 0.508746 -3.595 0.000326 ***
## exercise -1.994564 0.405384 -4.920 8.77e-07 ***
## smoke 6.295352 0.433862 14.510 < 2e-16 ***
## drink -2.941625 0.265832 -11.066 < 2e-16 ***
## genhlth_ex 0.972403 0.521240 1.866 0.062129 .
## genhlth_fa 2.807990 0.563156 4.986 6.25e-07 ***
## genhlth_po 12.397304 0.780740 15.879 < 2e-16 ***
## qlactlm 2.484758 0.444737 5.587 2.36e-08 ***
## pm2.5 0.003098 0.013272 0.233 0.815439
## ozone 32.069417 5.611449 5.715 1.12e-08 ***
## greenness 2.876051 0.259553 11.081 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.767 on 11384 degrees of freedom
## (5483 observations deleted due to missingness)
## Multiple R-squared: 0.3019, Adjusted R-squared: 0.3008
## F-statistic: 273.5 on 18 and 11384 DF, p-value: < 2.2e-16
delete income1:
mod18 = lm(menthlth ~ educ2 + income7 + employed + out_of_work + homemaker + student + hlthplan + exercise + smoke + drink + genhlth_ex + genhlth_fa + genhlth_po + qlactlm + pm2.5 + ozone + greenness,data = reg_data)
summary(mod18)
##
## Call:
## lm(formula = menthlth ~ educ2 + income7 + employed + out_of_work +
## homemaker + student + hlthplan + exercise + smoke + drink +
## genhlth_ex + genhlth_fa + genhlth_po + qlactlm + pm2.5 +
## ozone + greenness, data = reg_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -19.544 -1.304 -0.049 1.165 22.762
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 10.3135973 0.7240722 14.244 < 2e-16 ***
## educ2 0.6468932 0.3008295 2.150 0.031547 *
## income7 -0.9626900 0.4623028 -2.082 0.037330 *
## employed -1.8684210 0.3599958 -5.190 2.14e-07 ***
## out_of_work 3.9239208 0.8813191 4.452 8.57e-06 ***
## homemaker -1.4021883 0.6518621 -2.151 0.031493 *
## student -7.9603739 1.3276376 -5.996 2.08e-09 ***
## hlthplan -1.7880862 0.4993371 -3.581 0.000344 ***
## exercise -2.0982285 0.4033926 -5.201 2.01e-07 ***
## smoke 6.2490540 0.4319204 14.468 < 2e-16 ***
## drink -2.9527947 0.2640249 -11.184 < 2e-16 ***
## genhlth_ex 0.9570070 0.5187859 1.845 0.065106 .
## genhlth_fa 2.6284833 0.5539079 4.745 2.11e-06 ***
## genhlth_po 12.0485502 0.7658576 15.732 < 2e-16 ***
## qlactlm 2.4617039 0.4432345 5.554 2.85e-08 ***
## pm2.5 0.0007633 0.0132234 0.058 0.953971
## ozone 33.2546829 5.5915006 5.947 2.80e-09 ***
## greenness 2.9106861 0.2586182 11.255 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.769 on 11486 degrees of freedom
## (5382 observations deleted due to missingness)
## Multiple R-squared: 0.3036, Adjusted R-squared: 0.3025
## F-statistic: 294.5 on 17 and 11486 DF, p-value: < 2.2e-16
delete genhlth_ex:
mod19 = lm(menthlth ~ educ2 + income7 + employed + out_of_work + homemaker + student + hlthplan + exercise + smoke + drink + genhlth_fa + genhlth_po + qlactlm + pm2.5 + ozone + greenness,data = reg_data)
summary(mod19)
##
## Call:
## lm(formula = menthlth ~ educ2 + income7 + employed + out_of_work +
## homemaker + student + hlthplan + exercise + smoke + drink +
## genhlth_fa + genhlth_po + qlactlm + pm2.5 + ozone + greenness,
## data = reg_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -19.5456 -1.3045 -0.0468 1.1641 22.5928
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 10.454769 0.720140 14.518 < 2e-16 ***
## educ2 0.526837 0.293471 1.795 0.07265 .
## income7 -0.963099 0.462321 -2.083 0.03726 *
## employed -1.803972 0.358376 -5.034 4.88e-07 ***
## out_of_work 3.936364 0.880367 4.471 7.85e-06 ***
## homemaker -1.304757 0.649767 -2.008 0.04466 *
## student -7.804755 1.324278 -5.894 3.88e-09 ***
## hlthplan -1.832259 0.498734 -3.674 0.00024 ***
## exercise -2.018860 0.401007 -5.034 4.86e-07 ***
## smoke 6.191872 0.430883 14.370 < 2e-16 ***
## drink -2.891994 0.261827 -11.045 < 2e-16 ***
## genhlth_fa 2.503150 0.549823 4.553 5.35e-06 ***
## genhlth_po 11.963037 0.764544 15.647 < 2e-16 ***
## qlactlm 2.369105 0.440025 5.384 7.43e-08 ***
## pm2.5 -0.001208 0.013184 -0.092 0.92698
## ozone 34.062301 5.574115 6.111 1.02e-09 ***
## greenness 2.956187 0.257457 11.482 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.769 on 11488 degrees of freedom
## (5381 observations deleted due to missingness)
## Multiple R-squared: 0.3034, Adjusted R-squared: 0.3024
## F-statistic: 312.7 on 16 and 11488 DF, p-value: < 2.2e-16
delete educ2:
mod20 = lm(menthlth ~ income7 + employed + out_of_work + homemaker + student + hlthplan + exercise + smoke + drink + genhlth_fa + genhlth_po + qlactlm + pm2.5 + ozone + greenness,data = reg_data)
summary(mod20)
##
## Call:
## lm(formula = menthlth ~ income7 + employed + out_of_work + homemaker +
## student + hlthplan + exercise + smoke + drink + genhlth_fa +
## genhlth_po + qlactlm + pm2.5 + ozone + greenness, data = reg_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -19.680 -1.299 -0.054 1.161 22.680
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 10.8950021 0.6771652 16.089 < 2e-16 ***
## income7 -0.9797097 0.4622734 -2.119 0.034084 *
## employed -1.8655613 0.3567650 -5.229 1.73e-07 ***
## out_of_work 3.9186037 0.8803965 4.451 8.63e-06 ***
## homemaker -1.2810520 0.6496960 -1.972 0.048660 *
## student -8.0383517 1.3179967 -6.099 1.10e-09 ***
## hlthplan -1.8849753 0.4979174 -3.786 0.000154 ***
## exercise -2.1674331 0.3924119 -5.523 3.40e-08 ***
## smoke 6.3684068 0.4195520 15.179 < 2e-16 ***
## drink -3.0081166 0.2537350 -11.855 < 2e-16 ***
## genhlth_fa 2.5707795 0.5485835 4.686 2.82e-06 ***
## genhlth_po 11.9444866 0.7645486 15.623 < 2e-16 ***
## qlactlm 2.3221602 0.4392893 5.286 1.27e-07 ***
## pm2.5 -0.0001499 0.0131717 -0.011 0.990923
## ozone 33.5504328 5.5673557 6.026 1.73e-09 ***
## greenness 2.9562341 0.2574818 11.481 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.769 on 11489 degrees of freedom
## (5381 observations deleted due to missingness)
## Multiple R-squared: 0.3032, Adjusted R-squared: 0.3023
## F-statistic: 333.2 on 15 and 11489 DF, p-value: < 2.2e-16
this is the final regression model considering the relationship:
if we delete ozone:
mod17 = lm(menthlth ~ black + educ2 + income1 + income4 + married + employed + out_of_work + homemaker + student + hlthplan + smoke + drink + genhlth_ex + genhlth_fa + genhlth_po + qlactlm + pm2.5,data = reg_data)
summary(mod17)
##
## Call:
## lm(formula = menthlth ~ black + educ2 + income1 + income4 + married +
## employed + out_of_work + homemaker + student + hlthplan +
## smoke + drink + genhlth_ex + genhlth_fa + genhlth_po + qlactlm +
## pm2.5, data = reg_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -10.3218 -1.0317 -0.0736 0.9414 21.7154
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 13.74686 0.93724 14.667 < 2e-16 ***
## black 0.37834 0.11252 3.363 0.000778 ***
## educ2 1.53540 0.42476 3.615 0.000304 ***
## income1 -5.86823 1.01930 -5.757 9.09e-09 ***
## income4 -1.72439 0.76284 -2.260 0.023836 *
## married -1.03376 0.44919 -2.301 0.021414 *
## employed -3.26315 0.49272 -6.623 3.92e-11 ***
## out_of_work 4.40985 1.18811 3.712 0.000208 ***
## homemaker -2.56811 0.92069 -2.789 0.005303 **
## student -4.62055 1.98946 -2.323 0.020247 *
## hlthplan -3.08556 0.71042 -4.343 1.43e-05 ***
## smoke 5.32836 0.61954 8.601 < 2e-16 ***
## drink -3.83040 0.35301 -10.851 < 2e-16 ***
## genhlth_ex 2.15958 0.74362 2.904 0.003700 **
## genhlth_fa 2.72990 0.81743 3.340 0.000845 ***
## genhlth_po 15.63954 1.13876 13.734 < 2e-16 ***
## qlactlm 3.72105 0.63389 5.870 4.65e-09 ***
## pm2.5 0.03442 0.01414 2.434 0.014984 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.069 on 4777 degrees of freedom
## (12091 observations deleted due to missingness)
## Multiple R-squared: 0.4007, Adjusted R-squared: 0.3986
## F-statistic: 187.9 on 17 and 4777 DF, p-value: < 2.2e-16
aggregate into state level:
st.codes<-data.frame(
state=c(1,2,4,5,6,8,9,10,11,12,13,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,44,45,46,47,48,49,50,51,53,54,55,56,72),full=c("alabama","alaska","arizona","arkansas","california","colorado","connecticut","delaware","district of columbia","florida","georgia","hawaii","idaho","illinois","indiana","iowa","kansas","kentucky","louisiana","maine","maryland","massachusetts","michigan","minnesota","mississippi","missouri","montana", "nebraska","nevada","new hampshire","new jersey","new mexico","new york","north carolina","north dakota","ohio","oklahoma","oregon","pennsylvania","rhode island","south carolina","south dakota","tennessee","texas","utah","vermont","virginia","washington","west virginia","wisconsin","wyoming","puerto rico"))
write state level data for 2000:
library("SASxport")
library(dplyr)
data_00 <-read.xport("CDBRFS00.XPT")
data <- data_00 %>% mutate(fips=X.STATE * 1000 + CTYCODE) %>%
filter(!is.na(fips)) %>%
select(X.STATE,CTYCODE,IMONTH,MENTHLTH,CVDINFAR,CVDCORHD,CVDSTROK,ASTHMA,AGE,SEX,ORACE,EDUCA,INCOME2,MARITAL,EMPLOY,HLTHPLAN,EXERANY,X.RFSMOK2,DRINKANY,X.BMI2CAT,X.BMI2,GENHLTH,QLACTLMT)
NA
data$CTYCODE[data$CTYCODE %in% c(777,999)] <- c(NA)
data$MENTHLTH[data$MENTHLTH %in% c(77,88,99)] <- c(NA)
data$CVDINFAR[data$CVDINFAR %in% c(7,9)] <- c(NA)
data$CVDCORHD[data$CVDCORHD %in% c(7,9)] <- c(NA)
data$CVDSTROK[data$CVDSTROK %in% c(7,9)] <- c(NA)
data$ASTHMA[data$ASTHMA %in% c(7,9)] <- c(NA)
data$AGE[data$AGE %in% c(7,9)] <- c(NA)
data$ORACE[data$ORACE %in% c(7,9)] <- c(NA)
data$EDUCA[data$EDUCA == 9] <- c(NA)
data$INCOME2[data$INCOME2 %in% c(77,99)] <- c(NA)
data$MARITAL[data$MARITAL == 9] <- c(NA)
data$EMPLOY[data$EMPLOY == 9] <- c(NA)
data$HLTHPLAN[data$HLTHPLAN %in% c(7,9)] <- c(NA)
data$EXERANY[data$EXERANY %in% c(7,9)] <- c(NA)
data$X.RFSMOK2[data$X.RFSMOK2 == 9] <- c(NA)
data$DRINKANY[data$DRINKANY %in% c(7,9)] <- c(NA)
data$X.BMI2CAT[data$X.BMI2CAT == 9] <- c(NA)
data$X.BMI2[data$X.BMI2 == 999] <- c(NA)
data$GENHLTH[data$GENHLTH %in% c(7,9)] <- c(NA)
data$QLACTLM2[data$QLACTLMT %in% c(7,9)] <- c(NA)
SELECT
data00 <-data %>% group_by(X.STATE) %>%
dplyr::summarise(year = 2000,
menthlth = mean(MENTHLTH, na.rm= TRUE),
heartattack = mean(CVDINFAR == 1, na.rm = TRUE),
ACheartdis = mean(CVDCORHD == 1, na.rm = TRUE),
stroke = mean(CVDSTROK == 1, na.rm = TRUE),
asthma = mean(ASTHMA == 1, na.rm = TRUE),
age = mean(AGE, na.rm = TRUE),
sex = mean(SEX == 2, na.rm = TRUE),
white = mean(ORACE == 1, na.rm = TRUE),
black = mean(ORACE == 2, na.rm = TRUE),
asian = mean(ORACE == 3, na.rm = TRUE),
race_other = mean(ORACE == 4 | ORACE == 5 | ORACE == 6, na.rm = TRUE),
educ1 = mean(EDUCA == 1|EDUCA == 2, na.rm = TRUE),
educ2 = mean(EDUCA == 3|EDUCA == 4, na.rm = TRUE),
educ3 = mean(EDUCA == 5|EDUCA == 6, na.rm = TRUE),
income1 = mean(INCOME2 == 1, na.rm = TRUE),
income2 = mean(INCOME2 == 2, na.rm = TRUE),
income3 = mean(INCOME2 == 3, na.rm = TRUE),
income4 = mean(INCOME2 == 4, na.rm = TRUE),
income5 = mean(INCOME2 == 5, na.rm = TRUE),
income6 = mean(INCOME2 == 6, na.rm = TRUE),
income7 = mean(INCOME2 == 7, na.rm = TRUE),
income8 = mean(INCOME2 == 8, na.rm = TRUE),
married = mean(MARITAL == 1, na.rm = TRUE),
unmarried = mean(MARITAL == 2| MARITAL == 3| MARITAL == 4| MARITAL == 5| MARITAL == 6, na.rm = TRUE),
employed = mean(EMPLOY == 1 | EMPLOY == 2, na.rm = TRUE),
out_of_work = mean(EMPLOY ==3 | EMPLOY ==4, na.rm = TRUE),
homemaker = mean(EMPLOY ==5, na.rm = TRUE),
student = mean(EMPLOY == 6, na.rm = TRUE),
employ_other = mean(EMPLOY ==7 | EMPLOY ==8, na.rm = TRUE),
hlthplan = mean(HLTHPLAN == 1, na.rm = TRUE),
exercise = mean(EXERANY == 1, na.rm = TRUE),
smoke = mean(X.RFSMOK2 == 2, na.rm = TRUE),
drink = mean(DRINKANY == 1, na.rm = TRUE),
bmi_cat1 = mean(X.BMI2CAT == 1, na.rm =TRUE),
bmi_cat2 = mean(X.BMI2CAT == 2, na.rm = TRUE),
bmi_cat3 = mean(X.BMI2CAT == 3, na.rm = TRUE),
bmi_cts = mean(X.BMI2,na.rm = TRUE),
genhlth_ex = mean(GENHLTH == 1, na.rm = TRUE),
genhlth_vg = mean(GENHLTH == 2, na.rm = TRUE),
genhlth_go = mean(GENHLTH == 3, na.rm = TRUE),
genhlth_fa = mean(GENHLTH == 4, na.rm = TRUE),
genhlth_po = mean(GENHLTH == 5, na.rm = TRUE),
qlactlm = mean(QLACTLMT == 1, na.rm = TRUE))
colnames(data00)[1] = "state"
data00 = data00 %>%
left_join(st.codes,by = c("state"="state"))
write.csv(data00, file = "state_00.csv")
Mental health patterns in 2000 for all US states in a map:
library(fiftystater)
library(RColorBrewer)
library(mapproj)
data("fifty_states")
data00 = read_csv("state_menthlth.csv")
## Parsed with column specification:
## cols(
## state = col_integer(),
## year = col_double(),
## menthlth = col_double(),
## full = col_character()
## )
state_00 = data00 %>%
dplyr::select(full,menthlth)
max(state_00$menthlth)
## [1] NaN
min(state_00$menthlth)
## [1] NaN
ggplot(state_00,aes(map_id = full)) +
geom_map(aes(fill = menthlth),map = fifty_states) +
expand_limits(x = fifty_states$long,y = fifty_states$lat) +
coord_map() +
scale_x_continuous(expand=c(0,0)) +
scale_y_continuous(expand=c(0,0)) +
scale_fill_gradientn(colors = rev(colorRampPalette(brewer.pal(9,"RdPu"))(5)),
breaks = seq(7,18,,length.out = 5),
limits = c(7,18),
labels = seq(7,18,length.out = 5)) +
ylab("") +
xlab("")
Select average menthlth for each state across years and write it into a csv file, and then plot mental health across states:
library(SASxport)
data_00 <-read.xport("CDBRFS00.XPT")
data_00 <- data_00 %>% mutate(fips=X.STATE * 1000 + CTYCODE) %>%
filter(!is.na(fips)) %>%
select(X.STATE,CTYCODE,IMONTH,MENTHLTH,CVDINFAR,CVDCORHD,CVDSTROK,ASTHMA,AGE,SEX,ORACE,EDUCA,INCOME2,MARITAL,EMPLOY,HLTHPLAN,EXERANY,X.RFSMOK2,DRINKANY,X.BMI2CAT,X.BMI2,GENHLTH,QLACTLMT)
men_00_state = data_00 %>%
mutate(year = 2000) %>%
select(X.STATE,MENTHLTH,year)
men_00_state$MENTHLTH[men_00_state$MENTHLTH %in% c(77,88,99)] <- c(NA)
men_00_state <- men_00_state %>% group_by(X.STATE) %>%
dplyr::summarise(year = 2000,
menthlth = mean(MENTHLTH, na.rm= TRUE))
data_01 <- read.xport("~/Desktop/Harvard/fall 2017/bst260/project/CDBRFS01.XPT")
data_01 <- data_01 %>% mutate(fips=X.STATE * 1000 + CTYCODE) %>%
filter(!is.na(fips)) %>%
select(fips, X.STATE,CTYCODE,IMONTH,MENTHLTH,CVDINFR2,CVDCRHD2,CVDSTRK2,ASTHMA2,AGE,SEX,ORACE2,EDUCA,INCOME2,MARITAL,EMPLOY,HLTHPLAN,EXERANY2,X.RFSMOK2,X.BMI2CAT,X.BMI2,GENHLTH,QLACTLM2)
men_01_state = data_01 %>%
mutate(year = 2001) %>%
select(X.STATE,MENTHLTH,year)
men_01_state$MENTHLTH[men_01_state$MENTHLTH %in% c(77,88,99)] <- c(NA)
men_01_state <- men_01_state %>% group_by(X.STATE) %>%
dplyr::summarise(year = 2001,
menthlth = mean(MENTHLTH, na.rm= TRUE))
data_02 <- read.xport("~/Desktop/Harvard/fall 2017/bst260/project/cdbrfs02.xpt")
data_02 <- data_02 %>% mutate(fips=X.STATE * 1000 + CTYCODE) %>%
filter(!is.na(fips)) %>%
select(fips, X.STATE,CTYCODE,IMONTH,MENTHLTH,CVDINFR2,CVDCRHD2,CVDSTRK2,ASTHMA2,AGE,SEX,ORACE2,EDUCA,INCOME2,MARITAL,EMPLOY,HLTHPLAN,EXERANY2,X.RFSMOK2,DRNKANY3,X.BMI2CAT,X.BMI2,GENHLTH,QLACTLM2)
men_02_state = data_02 %>%
mutate(year = 2002) %>%
select(X.STATE,MENTHLTH,year)
men_02_state$MENTHLTH[men_02_state$MENTHLTH %in% c(77,88,99)] <- c(NA)
men_02_state <- men_02_state %>% group_by(X.STATE) %>%
dplyr::summarise(year = 2002,
menthlth = mean(MENTHLTH, na.rm= TRUE))
data_03 <- read.xport("~/Desktop/Harvard/fall 2017/bst260/project/cdbrfs03.xpt")
data_03 <- data_03 %>% mutate(fips=X.STATE * 1000 + CTYCODE) %>%
filter(!is.na(fips)) %>%
select(fips, X.STATE,CTYCODE,IMONTH,MENTHLTH,CVDINFR2,CVDCRHD2,CVDSTRK2,ASTHMA2,AGE,SEX,ORACE2,EDUCA,INCOME2,MARITAL,EMPLOY,HLTHPLAN,EXERANY2,X.RFSMOK2,DRNKANY3,X.BMI3CAT,X.BMI3,GENHLTH,QLACTLM2)
men_03_state = data_03 %>%
mutate(year = 2003) %>%
select(X.STATE,MENTHLTH,year)
men_03_state$MENTHLTH[men_03_state$MENTHLTH %in% c(77,88,99)] <- c(NA)
men_03_state <- men_03_state %>% group_by(X.STATE) %>%
dplyr::summarise(year = 2003,
menthlth = mean(MENTHLTH, na.rm= TRUE))
data_04 <- read.xport("~/Desktop/Harvard/fall 2017/bst260/project/CDBRFS04.XPT")
data_04 <- data_04 %>% mutate(fips=X.STATE * 1000 + CTYCODE) %>%
filter(!is.na(fips)) %>%
select(fips, X.STATE,CTYCODE,MENTHLTH,CVDINFR2,CVDCRHD2,CVDSTRK2,ASTHMA2,AGE,SEX,ORACE2,EDUCA,INCOME2,MARITAL,EMPLOY,HLTHPLAN,EXERANY2,X.RFSMOK2,DRNKANY3,X.BMI4CAT,X.BMI4,GENHLTH,QLACTLM2)
men_04_state = data_04 %>%
mutate(year = 2004) %>%
select(X.STATE,MENTHLTH,year)
men_04_state$MENTHLTH[men_04_state$MENTHLTH %in% c(77,88,99)] <- c(NA)
men_04_state <- men_04_state %>% group_by(X.STATE) %>%
dplyr::summarise(year = 2004,
menthlth = mean(MENTHLTH, na.rm= TRUE))
data_05 <- read.xport("~/Desktop/Harvard/fall 2017/bst260/project/CDBRFS05.XPT")
data_05 <- data_05 %>% mutate(fips=X.STATE * 1000 + CTYCODE) %>%
filter(!is.na(fips)) %>%
select(fips, X.STATE,CTYCODE,MSCODE,IMONTH,MENTHLTH,CVDINFR3,CVDCRHD3,CVDSTRK3,ASTHMA2,AGE,SEX,ORACE2,EDUCA,INCOME2,MARITAL,EMPLOY,HLTHPLAN,EXERANY2,X.RFSMOK3,DRNKANY4,X.BMI4CAT,X.BMI4,GENHLTH,QLACTLM2)
men_05_state = data_05 %>%
mutate(year = 2005) %>%
select(X.STATE,MENTHLTH,year)
men_05_state$MENTHLTH[men_05_state$MENTHLTH %in% c(77,88,99)] <- c(NA)
men_05_state <- men_05_state %>% group_by(X.STATE) %>%
dplyr::summarise(year = 2005,
menthlth = mean(MENTHLTH, na.rm= TRUE))
data_06 <- read.xport("~/Desktop/Harvard/fall 2017/bst260/project/CDBRFS06.XPT")
data_06 <- data_06 %>% mutate(fips=X.STATE * 1000 + CTYCODE) %>%
filter(!is.na(fips)) %>%
select(fips, X.STATE,CTYCODE,MSCODE,IMONTH,MENTHLTH,CVDINFR3,CVDCRHD3,CVDSTRK3,ASTHMA2,AGE,SEX,ORACE2,EDUCA,INCOME2,MARITAL,EMPLOY,HLTHPLAN,EXERANY2,X.RFSMOK3,DRNKANY4,X.BMI4CAT,X.BMI4,GENHLTH,QLACTLM2)
men_06_state = data_06 %>%
mutate(year = 2006) %>%
select(X.STATE,MENTHLTH,year)
men_06_state$MENTHLTH[men_06_state$MENTHLTH %in% c(77,88,99)] <- c(NA)
men_06_state <- men_06_state %>% group_by(X.STATE) %>%
dplyr::summarise(year = 2006,
menthlth = mean(MENTHLTH, na.rm= TRUE))
data_07 <- read.xport("~/Desktop/Harvard/fall 2017/bst260/project/CDBRFS07.XPT")
data_07 <- data_07 %>% mutate(fips=X.STATE * 1000 + CTYCODE) %>%
filter(!is.na(fips)) %>%
select(fips,X.STATE,CTYCODE,MSCODE,IMONTH,MENTHLTH,CVDINFR4,CVDCRHD4,CVDSTRK3,ASTHMA2,AGE,SEX,ORACE2,EDUCA,INCOME2,MARITAL,EMPLOY,HLTHPLAN,EXERANY2,X.RFSMOK3,DRNKANY4,X.BMI4CAT,X.BMI4,GENHLTH,QLACTLM2)
men_07_state = data_07 %>%
mutate(year = 2007) %>%
select(X.STATE,MENTHLTH,year)
men_07_state$MENTHLTH[men_07_state$MENTHLTH %in% c(77,88,99)] <- c(NA)
men_07_state <- men_07_state %>% group_by(X.STATE) %>%
dplyr::summarise(year = 2007,
menthlth = mean(MENTHLTH, na.rm= TRUE))
data_08 <- read.xport("~/Desktop/Harvard/fall 2017/bst260/project/CDBRFS08.XPT")
data_08 <- data_08 %>% mutate(fips=X.STATE * 1000 + CTYCODE) %>%
filter(!is.na(fips)) %>%
select(fips,X.STATE, CTYCODE, MSCODE,IYEAR,MENTHLTH, CVDINFR4, CVDCRHD4,CVDSTRK3, ASTHMA2, AGE, SEX, ORACE2, EDUCA, INCOME2, MARITAL, EMPLOY, HLTHPLAN, EXERANY2, X.RFSMOK3,DRNKANY4, X.BMI4CAT, X.BMI4, GENHLTH, QLACTLM2)
men_08_state = data_08 %>%
mutate(year = 2008) %>%
select(X.STATE,MENTHLTH,year)
men_08_state$MENTHLTH[men_08_state$MENTHLTH %in% c(77,88,99)] <- c(NA)
men_08_state <- men_08_state %>% group_by(X.STATE) %>%
dplyr::summarise(year = 2008,
menthlth = mean(MENTHLTH, na.rm= TRUE))
data_09 <- read.xport("~/Desktop/Harvard/fall 2017/bst260/project/CDBRFS09.XPT")
data_09 <- data_09 %>% mutate(fips=X.STATE * 1000 + CTYCODE) %>%
filter(!is.na(fips)) %>%
select(fips,X.STATE, CTYCODE, MSCODE,IYEAR,MENTHLTH, CVDINFR4, CVDCRHD4,CVDSTRK3, ASTHMA2, AGE, SEX, ORACE2, EDUCA, INCOME2, MARITAL, EMPLOY, HLTHPLAN, EXERANY2, X.RFSMOK3,DRNKANY4, X.BMI4CAT, X.BMI4, GENHLTH, QLACTLM2)
men_09_state = data_09 %>%
mutate(year = 2009) %>%
select(X.STATE,MENTHLTH,year)
men_09_state$MENTHLTH[men_09_state$MENTHLTH %in% c(77,88,99)] <- c(NA)
men_09_state <- men_09_state %>% group_by(X.STATE) %>%
dplyr::summarise(year = 2009,
menthlth = mean(MENTHLTH, na.rm= TRUE))
data_10 <- read.xport("~/Desktop/Harvard/fall 2017/bst260/project/CDBRFS10.XPT")
data_10 <- data_10 %>% mutate(fips=X.STATE * 1000 + CTYCODE) %>%
filter(!is.na(fips)) %>%
select(fips,X.STATE, CTYCODE, IYEAR,MENTHLTH, CVDINFR4, CVDCRHD4,CVDSTRK3, ASTHMA2, AGE, SEX, ORACE2, EDUCA, INCOME2, MARITAL, EMPLOY, HLTHPLAN, EXERANY2, X.RFSMOK3,DRNKANY4, X.BMI4CAT, X.BMI4, GENHLTH, QLACTLM2)
men_10_state = data_10 %>%
mutate(year = 2010) %>%
select(X.STATE,MENTHLTH,year)
men_10_state$MENTHLTH[men_10_state$MENTHLTH %in% c(77,88,99)] <- c(NA)
men_10_state <- men_10_state %>% group_by(X.STATE) %>%
dplyr::summarise(year = 2010,
menthlth = mean(MENTHLTH, na.rm= TRUE))
plot all years together:
men_state = rbind(men_00_state,men_01_state,men_02_state,men_03_state,men_04_state,men_05_state,men_06_state,men_07_state,men_08_state,men_09_state,men_10_state)
colnames(men_state)[1] = "state"
men_state = men_state %>%
left_join(st.codes,by = c("state"="state"))
write_csv(men_state,"state_menthlth.csv")
library(fiftystater)
library(RColorBrewer)
library(mapproj)
data("fifty_states")
men_state = read_csv("state_menthlth.csv")
## Parsed with column specification:
## cols(
## state = col_integer(),
## year = col_double(),
## menthlth = col_double(),
## full = col_character()
## )
state_map = men_state %>%
dplyr::select(full,menthlth,year)
max(state_map$menthlth,na.rm = T)
## [1] 18.61704
min(state_map$menthlth,na.rm = T)
## [1] 7.455927
ggplot(state_map,aes(map_id = full)) +
geom_map(aes(fill = menthlth),map = fifty_states) +
expand_limits(x = fifty_states$long,y = fifty_states$lat) +
coord_map() +
scale_x_continuous(expand=c(0,0)) +
scale_y_continuous(expand=c(0,0)) +
scale_fill_gradientn(colors = rev(colorRampPalette(brewer.pal(9,"RdPu"))(5)),
breaks = seq(7,19,,length.out = 5),
limits = c(7,19),
labels = seq(7,19,length.out = 5)) +
ylab("") +
xlab("") +
facet_wrap(~year,ncol = 3) +
ggtitle("Number of Days Mental Health Not Good for each US state from 2000 to 2010")
Run regression for different years and get a univariate plot each year:
library(readr)
library(dplyr)
library(tidyverse)
## Loading tidyverse: tibble
## Loading tidyverse: tidyr
## Loading tidyverse: purrr
## Conflicts with tidy packages ----------------------------------------------
## filter(): dplyr, stats
## lag(): dplyr, stats
## map(): purrr, maps
library(broom)
library(dslabs)
ds_theme_set()
data_all = read_csv("data_exposure_00_10.csv")
## Parsed with column specification:
## cols(
## .default = col_double(),
## fips = col_integer(),
## state = col_integer(),
## county = col_integer(),
## year = col_integer()
## )
## See spec(...) for full column specifications.
group_men_fit = data_all %>%
dplyr::select(-c(fips,state,county)) %>%
ggplot(aes(pm2.5, menthlth)) +
geom_point(alpha = 0.2,col = "dodgerblue1") +
geom_smooth(method = "lm",col = "black") +
facet_wrap(~ year) +
ggtitle("mental health vs pm 2.5")
group_men_fit
group_men_fit1 = data_all %>%
dplyr::select(-c(fips,state,county)) %>%
ggplot(aes(menthlth,ozone)) +
geom_point() +
geom_smooth(method = "lm") +
facet_wrap(~ year) +
ggtitle("mental health vs ozone")
group_men_fit1
univariate analysis for menthlth and pm 2.5:
data_all %>%
group_by(year) %>%
do(tidy(lm(menthlth ~ pm2.5, data = .),conf.int = TRUE)) %>%
filter(!grepl("Intercept", term))
## # A tibble: 11 x 8
## # Groups: year [11]
## year term estimate std.error statistic p.value conf.low
## <int> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 2000 pm2.5 0.2509847 0.03115326 8.056450 3.212205e-15 0.18982374
## 2 2001 pm2.5 0.2349882 0.03506758 6.701009 3.889138e-11 0.16615350
## 3 2002 pm2.5 0.4076348 0.05853970 6.963390 1.331356e-11 0.29255856
## 4 2003 pm2.5 0.1898703 0.03484784 5.448552 6.337765e-08 0.12149025
## 5 2004 pm2.5 0.2286335 0.02852119 8.016269 2.633883e-15 0.17267485
## 6 2005 pm2.5 0.2030903 0.02435099 8.340126 1.957300e-16 0.15531651
## 7 2006 pm2.5 0.2991832 0.03221822 9.286151 7.911323e-20 0.23596916
## 8 2007 pm2.5 0.3296625 0.03427715 9.617557 1.777878e-21 0.26244336
## 9 2008 pm2.5 0.2109055 0.04081043 5.167931 2.579768e-07 0.13087465
## 10 2009 pm2.5 0.1792752 0.05146552 3.483405 5.047276e-04 0.07834929
## 11 2010 pm2.5 0.2650898 0.03630722 7.301296 3.960809e-13 0.19388981
## # ... with 1 more variables: conf.high <dbl>
heatmap for menthlth across US states from 2000 to 2010:
library(RColorBrewer)
men_state = read_csv("state_menthlth.csv")
## Parsed with column specification:
## cols(
## state = col_integer(),
## year = col_double(),
## menthlth = col_double(),
## full = col_character()
## )
men_state %>% filter(!is.na(menthlth) & !is.na(full)) %>%
ggplot(aes(x = year, y = full,fill = menthlth)) +
geom_tile(color = "grey50") +
scale_x_continuous(expand=c(0,0)) +
scale_fill_gradientn(colors = brewer.pal(9, "YlOrRd"), trans = "sqrt") +
ylab("state") +
xlab("year") +
ggtitle("Heatmap of average menthlth in US states from 2000 to 2010")
As the year increases from 2000 to 2010, it shows that quite a few counties in the south part of U.S. have the worst average mental health condition.
deal with raw ndvi data and study the relationship between greenness and menthlth:
data_all = read_csv("data_exposure_00_10.csv")
ndvi_county = ndvi %>%
group_by(FIPS,year) %>%
summarise(greenness = mean(ndvimean_combined))
data_all = data_all %>%
left_join(ndvi_county,by=c("fips"="FIPS","year"="year"))
write_csv(data_all,"data_3_expo.csv")
data_all = read_csv("~/Desktop/Harvard/fall 2017/bst260/project/data_3_expo.csv")
## Parsed with column specification:
## cols(
## .default = col_double(),
## fips = col_integer(),
## state = col_integer(),
## county = col_integer()
## )
## See spec(...) for full column specifications.
group_men_fit2 = data_all %>%
dplyr::select(-c(fips,state,county)) %>%
ggplot(aes(menthlth,greenness)) +
geom_point() +
geom_smooth(method = "lm") +
facet_wrap(~ year)
group_men_fit2
Univariate analysis for income vs asthma:
library(readr)
library(dplyr)
library(tidyverse)
library(broom)
library(dslabs)
ds_theme_set()
data_all = read_csv("data_exposure_00_10.csv")
## Parsed with column specification:
## cols(
## .default = col_double(),
## fips = col_integer(),
## state = col_integer(),
## county = col_integer(),
## year = col_integer()
## )
## See spec(...) for full column specifications.
group_as_fit = data_all %>%
dplyr::select(-c(fips,state,county)) %>%
ggplot(aes(income1,asthma)) +
geom_point(alpha = 0.2,col = "darkorchid2") +
geom_smooth(method = "lm",col = "black") +
facet_wrap(~ year) +
ggtitle("income1 vs asthma") +
xlim(0,0.75) +
ylim(0,0.5)
group_as_fit
Univariate analysis for heartattack vs smoke:
library(readr)
library(dplyr)
library(tidyverse)
library(broom)
library(dslabs)
ds_theme_set()
data_all = read_csv("data_exposure_00_10.csv")
## Parsed with column specification:
## cols(
## .default = col_double(),
## fips = col_integer(),
## state = col_integer(),
## county = col_integer(),
## year = col_integer()
## )
## See spec(...) for full column specifications.
group_he_fit = data_all %>%
filter(year %in% c(2005,2006,2007,2008,2009,2010)) %>%
dplyr::select(-c(fips,state,county)) %>%
ggplot(aes(smoke,heartattack)) +
geom_point(alpha = 0.2,col = "indianred1") +
geom_smooth(method = "lm",col = "black") +
facet_wrap(~ year) +
ggtitle("smoke vs heartattack") +
xlim(0,0.6) +
ylim(0,0.5)
group_he_fit
Draw the Pearson correlation map:
data = read_csv("~/Desktop/Harvard/fall 2017/bst260/project/data_3_expo.csv")
## Parsed with column specification:
## cols(
## .default = col_double(),
## fips = col_integer(),
## state = col_integer(),
## county = col_integer()
## )
## See spec(...) for full column specifications.
data = data %>%
dplyr::select(income7,employed,out_of_work,homemaker,student,hlthplan,exercise,smoke, drink,genhlth_fa ,genhlth_po ,qlactlm,pm2.5 ,ozone,greenness)
data = data[complete.cases(data),]
cormat <- round(cor(data),2)
head(cormat)
## income7 employed out_of_work homemaker student hlthplan
## income7 1.00 0.31 -0.11 -0.05 0.04 0.25
## employed 0.31 1.00 -0.25 -0.26 0.07 0.17
## out_of_work -0.11 -0.25 1.00 -0.07 -0.03 -0.21
## homemaker -0.05 -0.26 -0.07 1.00 -0.02 -0.14
## student 0.04 0.07 -0.03 -0.02 1.00 -0.10
## hlthplan 0.25 0.17 -0.21 -0.14 -0.10 1.00
## exercise smoke drink genhlth_fa genhlth_po qlactlm pm2.5 ozone
## income7 0.26 -0.13 0.29 -0.24 -0.28 -0.22 -0.05 0.02
## employed 0.42 -0.12 0.51 -0.43 -0.49 -0.53 -0.05 0.09
## out_of_work -0.10 0.09 -0.07 0.11 0.08 0.09 0.01 -0.06
## homemaker -0.10 0.03 -0.21 0.08 0.09 0.04 0.07 0.13
## student 0.14 0.04 0.06 -0.08 -0.09 -0.14 0.07 0.08
## hlthplan 0.25 -0.29 0.34 -0.26 -0.27 -0.14 0.03 -0.05
## greenness
## income7 -0.13
## employed -0.25
## out_of_work 0.07
## homemaker -0.03
## student -0.05
## hlthplan -0.04
library(reshape2)
##
## Attaching package: 'reshape2'
## The following object is masked from 'package:tidyr':
##
## smiths
melted_cormat <- melt(cormat)
head(melted_cormat)
## Var1 Var2 value
## 1 income7 income7 1.00
## 2 employed income7 0.31
## 3 out_of_work income7 -0.11
## 4 homemaker income7 -0.05
## 5 student income7 0.04
## 6 hlthplan income7 0.25
library(ggplot2)
ggplot(data = melted_cormat, aes(x=Var1, y=Var2, fill=value)) +
geom_tile()
get_lower_tri<-function(cormat){
cormat[upper.tri(cormat)] <- NA
return(cormat)
}
# Get upper triangle of the correlation matrix
get_upper_tri <- function(cormat){
cormat[lower.tri(cormat)]<- NA
return(cormat)
}
upper_tri <- get_upper_tri(cormat)
upper_tri
## income7 employed out_of_work homemaker student hlthplan
## income7 1 0.31 -0.11 -0.05 0.04 0.25
## employed NA 1.00 -0.25 -0.26 0.07 0.17
## out_of_work NA NA 1.00 -0.07 -0.03 -0.21
## homemaker NA NA NA 1.00 -0.02 -0.14
## student NA NA NA NA 1.00 -0.10
## hlthplan NA NA NA NA NA 1.00
## exercise NA NA NA NA NA NA
## smoke NA NA NA NA NA NA
## drink NA NA NA NA NA NA
## genhlth_fa NA NA NA NA NA NA
## genhlth_po NA NA NA NA NA NA
## qlactlm NA NA NA NA NA NA
## pm2.5 NA NA NA NA NA NA
## ozone NA NA NA NA NA NA
## greenness NA NA NA NA NA NA
## exercise smoke drink genhlth_fa genhlth_po qlactlm pm2.5 ozone
## income7 0.26 -0.13 0.29 -0.24 -0.28 -0.22 -0.05 0.02
## employed 0.42 -0.12 0.51 -0.43 -0.49 -0.53 -0.05 0.09
## out_of_work -0.10 0.09 -0.07 0.11 0.08 0.09 0.01 -0.06
## homemaker -0.10 0.03 -0.21 0.08 0.09 0.04 0.07 0.13
## student 0.14 0.04 0.06 -0.08 -0.09 -0.14 0.07 0.08
## hlthplan 0.25 -0.29 0.34 -0.26 -0.27 -0.14 0.03 -0.05
## exercise 1.00 -0.31 0.58 -0.47 -0.48 -0.39 -0.20 -0.01
## smoke NA 1.00 -0.24 0.24 0.28 0.21 0.17 0.09
## drink NA NA 1.00 -0.48 -0.54 -0.40 -0.21 -0.13
## genhlth_fa NA NA NA 1.00 0.25 0.40 0.12 -0.01
## genhlth_po NA NA NA NA 1.00 0.51 0.14 0.03
## qlactlm NA NA NA NA NA 1.00 0.00 -0.06
## pm2.5 NA NA NA NA NA NA 1.00 0.24
## ozone NA NA NA NA NA NA NA 1.00
## greenness NA NA NA NA NA NA NA NA
## greenness
## income7 -0.13
## employed -0.25
## out_of_work 0.07
## homemaker -0.03
## student -0.05
## hlthplan -0.04
## exercise -0.18
## smoke 0.16
## drink -0.25
## genhlth_fa 0.17
## genhlth_po 0.24
## qlactlm 0.20
## pm2.5 0.36
## ozone -0.27
## greenness 1.00
# Melt the correlation matrix
library(reshape2)
melted_cormat <- melt(upper_tri, na.rm = TRUE)
# Heatmap
library(ggplot2)
ggplot(data = melted_cormat, aes(Var2, Var1, fill = value))+
geom_tile(color = "white")+
scale_fill_gradient2(low = "blue", high = "red", mid = "white",
midpoint = 0, limit = c(-1,1), space = "Lab",
name="Pearson\nCorrelation") +
theme_minimal()+
theme(axis.text.x = element_text(angle = 45, vjust = 1,
size = 12, hjust = 1))+
coord_fixed()
reorder_cormat <- function(cormat){
# Use correlation between variables as distance
dd <- as.dist((1-cormat)/2)
hc <- hclust(dd)
cormat <-cormat[hc$order, hc$order]
}
# Reorder the correlation matrix
cormat <- reorder_cormat(cormat)
upper_tri <- get_upper_tri(cormat)
# Melt the correlation matrix
melted_cormat <- melt(upper_tri, na.rm = TRUE)
# Create a ggheatmap
ggheatmap <- ggplot(melted_cormat, aes(Var2, Var1, fill = value))+
geom_tile(color = "white")+
scale_fill_gradient2(low = "blue", high = "red", mid = "white",
midpoint = 0, limit = c(-1,1), space = "Lab",
name="Pearson\nCorrelation") +
theme_minimal()+ # minimal theme
theme(axis.text.x = element_text(angle = 45, vjust = 1,
size = 12, hjust = 1))+
coord_fixed()
# Print the heatmap
print(ggheatmap)
ggheatmap +
geom_text(aes(Var2, Var1, label = value), color = "black", size = 4) +
theme(
axis.title.x = element_blank(),
axis.title.y = element_blank(),
panel.grid.major = element_blank(),
panel.border = element_blank(),
panel.background = element_blank(),
axis.ticks = element_blank(),
legend.justification = c(1, 0),
legend.position = c(0.6, 0.7),
legend.direction = "horizontal")+
guides(fill = guide_colorbar(barwidth = 7, barheight = 1,
title.position = "top", title.hjust = 0.5))
Considering the possibility of colinearity, we also checked the correlation map to see if there are possible pairs that have high correlation. It turns out that it is fine!
try to draw the fitted line vs actual value in 2010: firstly I want to label each state but it turns out there are too much so I give up
library(ggrepel)
library(dplyr)
ds_theme_set()
data_all = read.csv("data_3_expo.csv")
st.codes<-data.frame(state=as.factor(c("AK", "AL", "AR", "AZ", "CA", "CO", "CT", "DC", "DE", "FL", "GA","HI", "IA", "ID", "IL", "IN", "KS", "KY", "LA", "MA", "MD", "ME","MI", "MN", "MO", "MS", "MT", "NC", "ND", "NE", "NH", "NJ", "NM","NV", "NY", "OH", "OK", "OR", "PA", "PR", "RI", "SC", "SD", "TN", "TX", "UT", "VA", "VT", "WA", "WI", "WV", "WY")),
full=as.factor(c("alaska","alabama","arkansas","arizona","california","colorado", "connecticut","district of columbia","delaware","florida","georgia","hawaii","iowa","idaho","illinois","indiana","kansas","kentucky","louisiana","massachusetts","maryland","maine","michigan","minnesota","missouri","mississippi","montana","north carolina","north dakota","nebraska","new hampshire","new jersey","new mexico","nevada","new york","ohio","oklahoma","oregon","pennsylvania","puerto rico","rhode island","south carolina","south dakota","tennessee","texas","utah","virginia","vermont","washington","wisconsin","west virginia","wyoming")))
highlight <- c("AK", "AL", "AR", "AZ", "CA", "CO", "CT", "DC", "DE", "FL", "GA","HI", "IA", "ID", "IL", "IN", "KS", "KY", "LA", "MA", "MD", "ME","MI", "MN", "MO", "MS", "MT", "NC", "ND", "NE", "NH", "NJ", "NM","NV", "NY", "OH", "OK", "OR", "PA", "PR", "RI", "SC", "SD", "TN", "TX", "UT", "VA", "VT", "WA", "WI", "WV", "WY")
fips_data = county.fips %>%
separate(polyname, c("state", "county"), ",")
data_all = data_all %>%
left_join(fips_data,by = c("fips"="fips")) %>%
left_join(st.codes,by = c("state.y" = "full"))
data_all_10 = data_all %>%
dplyr::filter(year == 2010) %>%
mutate(menthlth_hat = predict(mod20, newdata = .))
data_all_10 %>%
ggplot(aes(menthlth_hat,menthlth,col = state,label = state)) +
geom_point(show.legend = F) +
geom_abline() +
ggtitle("fitted values vs actual values of menthlth in 2010")
Take the year 2010 for an example, we drew a plot of actual “menthlth” values and fitted “menthlth” values, where the color represents different states in the U.S., and we could conclude that the points are roughly follow the fitted line as expected.